// on virial steps the CPU reduction path is taken
// TODO: remove flags.computeEnergy, ref #3128
flags.useGpuFBufferOps = simulationWork.useGpuBufferOps && !(flags.computeVirial || flags.computeEnergy);
- flags.useGpuPmeFReduction = flags.useGpuFBufferOps && (simulationWork.usePmeGpu &&
- (rankHasPmeDuty || simulationWork.useGpuPmePPCommunication));
+ flags.useGpuPmeFReduction = flags.useGpuFBufferOps && (simulationWork.useGpuPme &&
+ (rankHasPmeDuty || simulationWork.useGpuPmePpCommunication));
return flags;
}
const StepWorkload &stepWork = runScheduleWork->stepWork;
- const bool useGpuPmeOnThisRank = simulationWork.usePmeGpu && thisRankHasDuty(cr, DUTY_PME);
+ const bool useGpuPmeOnThisRank = simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME);
const int pmeFlags = makePmeFlags(stepWork);
// Switches on whether to use GPU for position and force buffer operations
* and domain decomposition does not use the graph,
* we do not need to worry about shifting.
*/
- bool reinitGpuPmePpComms = simulationWork.useGpuPmePPCommunication && (stepWork.doNeighborSearch);
- bool sendCoordinatesFromGpu = simulationWork.useGpuPmePPCommunication && !(stepWork.doNeighborSearch);
+ bool reinitGpuPmePpComms = simulationWork.useGpuPmePpCommunication && (stepWork.doNeighborSearch);
+ bool sendCoordinatesFromGpu = simulationWork.useGpuPmePpCommunication && !(stepWork.doNeighborSearch);
gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()),
lambda[efptCOUL], lambda[efptVDW],
(stepWork.computeVirial || stepWork.computeEnergy),
- step, simulationWork.useGpuPmePPCommunication, reinitGpuPmePpComms,
+ step, simulationWork.useGpuPmePpCommunication, reinitGpuPmePpComms,
sendCoordinatesFromGpu, wcycle);
}
#endif /* GMX_MPI */
// If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops
// TODO refoactor this and unify with below default-path call to the same function
- if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && simulationWork.useGpuPmePPCommunication)
+ if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && simulationWork.useGpuPmePpCommunication)
{
/* In case of node-splitting, the PP nodes receive the long-range
* forces, virial and energy from the PME nodes here.
*/
- pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd, simulationWork.useGpuPmePPCommunication, stepWork.useGpuPmeFReduction, wcycle);
+ pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd, simulationWork.useGpuPmePpCommunication, stepWork.useGpuPmeFReduction, wcycle);
}
}
// TODO refoactor this and unify with above PME-PP GPU communication path call to the same function
- if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePPCommunication)
+ if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication)
{
/* In case of node-splitting, the PP nodes receive the long-range
* forces, virial and energy from the PME nodes here.
*/
pme_receive_force_ener(fr, cr, &forceOut.forceWithVirial(), enerd,
- simulationWork.useGpuPmePPCommunication, false, wcycle);
+ simulationWork.useGpuPmePpCommunication, false, wcycle);
}
if (stepWork.computeForces)
// 2. The proper GPU syncronization is introduced, so that the H2D and D2H data copies can be performed in the separate
// stream owned by the StatePropagatorDataGpu
const auto &simulationWork = runScheduleWork->simulationWork;
- const bool useGpuForPme = simulationWork.usePmeGpu;
+ const bool useGpuForPme = simulationWork.useGpuPme;
const bool useGpuForNonbonded = simulationWork.useGpuNonbonded;
// Temporary solution to make sure that the buffer ops are offloaded when update is offloaded
const bool useGpuForBufferOps = simulationWork.useGpuBufferOps;
MdrunScheduleWorkload runScheduleWork;
// Also populates the simulation constant workload description.
runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded,
- useGpuForPme,
- (pmeRunMode == PmeRunMode::GPU),
+ pmeRunMode,
useGpuForBonded,
useGpuForUpdate,
devFlags.enableGpuBufferOps,
class SimulationWorkload
{
public:
+ //! If we have calculation of short range nonbondeds on CPU
+ bool useCpuNonbonded = false;
//! If we have calculation of short range nonbondeds on GPU
bool useGpuNonbonded = false;
//! If we have calculation of long range PME in GPU
- bool usePmeGpu = false;
+ bool useCpuPme = false;
+ //! If we have calculation of long range PME in GPU
+ bool useGpuPme = false;
//! If PME FFT solving is done on GPU.
- bool usePmeFftGpu = false;
+ bool useGpuPmeFft = false;
//! If bonded interactions are calculated on GPU.
bool useGpuBonded = false;
//! If update and constraint solving is performed on GPU.
//! If domain decomposition halo exchange is performed on GPU.
bool useGpuHaloExchange = false;
//! If direct PP-PME communication between GPU is used.
- bool useGpuPmePPCommunication = false;
+ bool useGpuPmePpCommunication = false;
//! If direct GPU-GPU communication is enabled.
bool useGpuDirectCommunication = false;
};
#include "decidesimulationworkload.h"
+#include "gromacs/ewald/pme.h"
#include "gromacs/taskassignment/taskassignment.h"
#include "gromacs/utility/arrayref.h"
namespace gmx
{
-SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
- bool useGpuForPme,
- bool useGpuForPmeFft,
- bool useGpuForBonded,
- bool useGpuForUpdateConstraints,
- bool useGpuForBufferOps,
- bool useGpuHaloExchange,
- bool useGpuPmePpComm)
+SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
+ PmeRunMode pmeRunMode,
+ bool useGpuForBonded,
+ bool useGpuForUpdateConstraints,
+ bool useGpuForBufferOps,
+ bool useGpuHaloExchange,
+ bool useGpuPmePpComm)
{
- SimulationWorkload simulationWorkload {
- useGpuForNonbonded,
- useGpuForPme,
- useGpuForPmeFft,
- useGpuForBonded,
- useGpuForUpdateConstraints,
- useGpuForBufferOps,
- useGpuHaloExchange,
- useGpuPmePpComm,
- useGpuHaloExchange || useGpuPmePpComm
- };
+ SimulationWorkload simulationWorkload;
+ simulationWorkload.useCpuNonbonded = !useGpuForNonbonded;
+ simulationWorkload.useGpuNonbonded = useGpuForNonbonded;
+ simulationWorkload.useCpuPme = (pmeRunMode == PmeRunMode::CPU);
+ simulationWorkload.useGpuPme = (pmeRunMode == PmeRunMode::GPU || pmeRunMode == PmeRunMode::Mixed);
+ simulationWorkload.useGpuPmeFft = (pmeRunMode == PmeRunMode::Mixed);
+ simulationWorkload.useGpuBonded = useGpuForBonded;
+ simulationWorkload.useGpuUpdate = useGpuForUpdateConstraints;
+ simulationWorkload.useGpuBufferOps = useGpuForBufferOps;
+ simulationWorkload.useGpuHaloExchange = useGpuHaloExchange;
+ simulationWorkload.useGpuPmePpCommunication = useGpuPmePpComm;
+ simulationWorkload.useGpuDirectCommunication = useGpuHaloExchange || useGpuPmePpComm;
return simulationWorkload;
}
#include "gromacs/mdtypes/simulation_workload.h"
+enum class PmeRunMode;
+
namespace gmx
{
*
* \param[in] useGpuForNonbonded If we have short-range nonbonded interactions
* calculations on GPU(s).
- * \param[in] useGpuForPme If long range PME interactions are calculated on GPU(s).
- * \param[in] useGpuForPmeFft If FFT solving for PME is done on the GPU.
+ * \param[in] pmeRunMode Run mode indicating what resource is PME execured on.
* \param[in] useGpuForBonded If bonded interactions are calculated on GPU(s).
* \param[in] useGpuForUpdateConstraints If coordinate update and constraint solving is performed on
* GPU(s).
* \param[in] useGpuPmePpComm If GPu direct communication is used in PME-PP communication.
* \returns Simulation lifetime constant workload description.
*/
-SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
- bool useGpuForPme,
- bool useGpuForPmeFft,
- bool useGpuForBonded,
- bool useGpuForUpdateConstraints,
- bool useGpuForBufferOps,
- bool useGpuHaloExchange,
- bool useGpuPmePpComm);
+SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded,
+ PmeRunMode pmeRunMode,
+ bool useGpuForBonded,
+ bool useGpuForUpdateConstraints,
+ bool useGpuForBufferOps,
+ bool useGpuHaloExchange,
+ bool useGpuPmePpComm);
} // namespace gmx