#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/device_stream.h"
+#include "gromacs/mdtypes/simulation_workload.h"
#include "gromacs/utility/enumerationhelpers.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/gmxassert.h"
* \throws InternalError If any of the required resources could not be initialized.
*/
Impl(const DeviceInformation& deviceInfo,
- bool useGpuForPme,
bool havePpDomainDecomposition,
- bool doGpuPmePpTransfer,
- bool useGpuForUpdate,
+ SimulationWorkload simulationWork,
bool useTiming);
~Impl();
// DeviceStreamManager::Impl
DeviceStreamManager::Impl::Impl(const DeviceInformation& deviceInfo,
- const bool useGpuForPme,
const bool havePpDomainDecomposition,
- const bool doGpuPmePpTransfer,
- const bool useGpuForUpdate,
+ const SimulationWorkload simulationWork,
const bool useTiming) :
context_(deviceInfo)
{
{
streams_[DeviceStreamType::NonBondedLocal].init(context_, DeviceStreamPriority::Normal, useTiming);
- if (useGpuForPme)
+ if (simulationWork.useGpuPme)
{
/* Creating a PME GPU stream:
* - default high priority with CUDA
useTiming);
}
// Update stream is used both for coordinates transfers and for GPU update/constraints
- if (useGpuForPme || useGpuForUpdate)
+ if (simulationWork.useGpuPme || simulationWork.useGpuUpdate || simulationWork.useGpuBufferOps)
{
streams_[DeviceStreamType::UpdateAndConstraints].init(
context_, DeviceStreamPriority::Normal, useTiming);
}
- if (doGpuPmePpTransfer)
+ if (simulationWork.useGpuPmePpCommunication)
{
streams_[DeviceStreamType::PmePpTransfer].init(context_, DeviceStreamPriority::Normal, useTiming);
}
// DeviceStreamManager
DeviceStreamManager::DeviceStreamManager(const DeviceInformation& deviceInfo,
- const bool useGpuForPme,
const bool havePpDomainDecomposition,
- const bool doGpuPmePpTransfer,
- const bool useGpuForUpdate,
+ const SimulationWorkload simulationWork,
const bool useTiming) :
- impl_(new Impl(deviceInfo, useGpuForPme, havePpDomainDecomposition, doGpuPmePpTransfer, useGpuForUpdate, useTiming))
+ impl_(new Impl(deviceInfo, havePpDomainDecomposition, simulationWork, useTiming))
{
}
#include <gtest/gtest.h>
+#include "gromacs/mdtypes/simulation_workload.h"
#include "gromacs/utility/enumerationhelpers.h"
#include "gputest.h"
{
SCOPED_TRACE("No DD, no PME rank, no GPU update");
- bool useGpuForPme = false;
+ SimulationWorkload simulationWork;
+ simulationWork.useGpuPme = false;
+ simulationWork.useGpuPmePpCommunication = false;
+ simulationWork.useGpuUpdate = false;
bool havePpDomainDecomposition = false;
- bool doGpuPmePpTransfer = false;
- bool useGpuForUpdate = false;
- DeviceStreamManager manager(*deviceInfo, useGpuForPme, havePpDomainDecomposition,
- doGpuPmePpTransfer, useGpuForUpdate, useTiming);
+ DeviceStreamManager manager(*deviceInfo, havePpDomainDecomposition, simulationWork, useTiming);
expectValidStreams(&manager, { DeviceStreamType::NonBondedLocal });
expectInvalidStreams(&manager, { DeviceStreamType::NonBondedNonLocal,
{
SCOPED_TRACE("With DD, no PME rank, no GPU update");
- bool useGpuForPme = false;
+ SimulationWorkload simulationWork;
+ simulationWork.useGpuPme = false;
+ simulationWork.useGpuPmePpCommunication = false;
+ simulationWork.useGpuUpdate = false;
bool havePpDomainDecomposition = true;
- bool doGpuPmePpTransfer = false;
- bool useGpuForUpdate = false;
- DeviceStreamManager manager(*deviceInfo, useGpuForPme, havePpDomainDecomposition,
- doGpuPmePpTransfer, useGpuForUpdate, useTiming);
+ DeviceStreamManager manager(*deviceInfo, havePpDomainDecomposition, simulationWork, useTiming);
expectValidStreams(&manager, { DeviceStreamType::NonBondedLocal,
DeviceStreamType::NonBondedNonLocal });
{
SCOPED_TRACE("No DD, with PME rank, no GPU update");
- bool useGpuForPme = true;
+ SimulationWorkload simulationWork;
+ simulationWork.useGpuPme = true;
+ simulationWork.useGpuPmePpCommunication = true;
+ simulationWork.useGpuUpdate = false;
bool havePpDomainDecomposition = false;
- bool doGpuPmePpTransfer = true;
- bool useGpuForUpdate = false;
- DeviceStreamManager manager(*deviceInfo, useGpuForPme, havePpDomainDecomposition,
- doGpuPmePpTransfer, useGpuForUpdate, useTiming);
+ DeviceStreamManager manager(*deviceInfo, havePpDomainDecomposition, simulationWork, useTiming);
expectValidStreams(&manager, { DeviceStreamType::Pme, DeviceStreamType::NonBondedLocal,
DeviceStreamType::PmePpTransfer,
{
SCOPED_TRACE("With DD, with PME rank, no GPU update");
- bool useGpuForPme = true;
+ SimulationWorkload simulationWork;
+ simulationWork.useGpuPme = true;
+ simulationWork.useGpuPmePpCommunication = true;
+ simulationWork.useGpuUpdate = false;
bool havePpDomainDecomposition = true;
- bool doGpuPmePpTransfer = true;
- bool useGpuForUpdate = false;
- DeviceStreamManager manager(*deviceInfo, useGpuForPme, havePpDomainDecomposition,
- doGpuPmePpTransfer, useGpuForUpdate, useTiming);
+ DeviceStreamManager manager(*deviceInfo, havePpDomainDecomposition, simulationWork, useTiming);
expectValidStreams(&manager, { DeviceStreamType::Pme, DeviceStreamType::NonBondedLocal,
DeviceStreamType::NonBondedNonLocal, DeviceStreamType::PmePpTransfer,
{
SCOPED_TRACE("No DD, no PME rank, with GPU update");
- bool useGpuForPme = false;
+ SimulationWorkload simulationWork;
+ simulationWork.useGpuPme = false;
+ simulationWork.useGpuPmePpCommunication = false;
+ simulationWork.useGpuUpdate = true;
bool havePpDomainDecomposition = false;
- bool doGpuPmePpTransfer = false;
- bool useGpuForUpdate = true;
- DeviceStreamManager manager(*deviceInfo, useGpuForPme, havePpDomainDecomposition,
- doGpuPmePpTransfer, useGpuForUpdate, useTiming);
+ DeviceStreamManager manager(*deviceInfo, havePpDomainDecomposition, simulationWork, useTiming);
expectValidStreams(&manager, { DeviceStreamType::NonBondedLocal,
DeviceStreamType::UpdateAndConstraints });
{
SCOPED_TRACE("With DD, no PME rank, with GPU update");
- bool useGpuForPme = false;
+ SimulationWorkload simulationWork;
+ simulationWork.useGpuPme = false;
+ simulationWork.useGpuPmePpCommunication = false;
+ simulationWork.useGpuUpdate = true;
bool havePpDomainDecomposition = true;
- bool doGpuPmePpTransfer = false;
- bool useGpuForUpdate = true;
- DeviceStreamManager manager(*deviceInfo, useGpuForPme, havePpDomainDecomposition,
- doGpuPmePpTransfer, useGpuForUpdate, useTiming);
+ DeviceStreamManager manager(*deviceInfo, havePpDomainDecomposition, simulationWork, useTiming);
expectValidStreams(&manager, { DeviceStreamType::NonBondedLocal, DeviceStreamType::NonBondedNonLocal,
DeviceStreamType::UpdateAndConstraints });
{
SCOPED_TRACE("No DD, with PME rank, with GPU update");
- bool useGpuForPme = true;
+ SimulationWorkload simulationWork;
+ simulationWork.useGpuPme = true;
+ simulationWork.useGpuPmePpCommunication = true;
+ simulationWork.useGpuUpdate = true;
bool havePpDomainDecomposition = false;
- bool doGpuPmePpTransfer = true;
- bool useGpuForUpdate = true;
- DeviceStreamManager manager(*deviceInfo, useGpuForPme, havePpDomainDecomposition,
- doGpuPmePpTransfer, useGpuForUpdate, useTiming);
+ DeviceStreamManager manager(*deviceInfo, havePpDomainDecomposition, simulationWork, useTiming);
expectValidStreams(&manager, { DeviceStreamType::Pme, DeviceStreamType::NonBondedLocal,
DeviceStreamType::PmePpTransfer,
{
SCOPED_TRACE("With DD, with PME rank, with GPU update");
- bool useGpuForPme = true;
+ SimulationWorkload simulationWork;
+ simulationWork.useGpuPme = true;
+ simulationWork.useGpuPmePpCommunication = true;
+ simulationWork.useGpuUpdate = true;
bool havePpDomainDecomposition = true;
- bool doGpuPmePpTransfer = true;
- bool useGpuForUpdate = true;
- DeviceStreamManager manager(*deviceInfo, useGpuForPme, havePpDomainDecomposition,
- doGpuPmePpTransfer, useGpuForUpdate, useTiming);
+ DeviceStreamManager manager(*deviceInfo, havePpDomainDecomposition, simulationWork, useTiming);
expectValidStreams(&manager, { DeviceStreamType::Pme, DeviceStreamType::NonBondedLocal,
DeviceStreamType::NonBondedNonLocal, DeviceStreamType::PmePpTransfer,
const bool printHostName = (cr->nnodes > 1);
gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
+ MdrunScheduleWorkload runScheduleWork;
+ // Also populates the simulation constant workload description.
+ runScheduleWork.simulationWork = createSimulationWorkload(
+ *inputrec, useGpuForNonbonded, pmeRunMode, useGpuForBonded, useGpuForUpdate,
+ devFlags.enableGpuBufferOps, devFlags.enableGpuHaloExchange, devFlags.enableGpuPmePPComm);
+
std::unique_ptr<DeviceStreamManager> deviceStreamManager = nullptr;
if (deviceInfo != nullptr)
dd_setup_dlb_resource_sharing(cr, deviceId);
}
deviceStreamManager = std::make_unique<DeviceStreamManager>(
- *deviceInfo, useGpuForPme, useGpuForNonbonded, havePPDomainDecomposition(cr),
- useGpuForUpdate, useTiming);
+ *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useTiming);
}
// If the user chose a task assignment, give them some hints
// Only for DD, only master PP rank needs to perform setup, and only if thread MPI plus
// any of the GPU communication features are active.
if (DOMAINDECOMP(cr) && MASTER(cr) && thisRankHasDuty(cr, DUTY_PP) && GMX_THREAD_MPI
- && (devFlags.enableGpuHaloExchange || devFlags.enableGpuPmePPComm))
+ && (runScheduleWork.simulationWork.useGpuHaloExchange
+ || runScheduleWork.simulationWork.useGpuPmePpCommunication))
{
setupGpuDevicePeerAccess(gpuIdsToUse, mdlog);
}
// TODO: Forcerec is not a correct place to store it.
fr->deviceStreamManager = deviceStreamManager.get();
- if (devFlags.enableGpuPmePPComm && !thisRankHasDuty(cr, DUTY_PME))
+ if (runScheduleWork.simulationWork.useGpuPmePpCommunication && !thisRankHasDuty(cr, DUTY_PME))
{
GMX_RELEASE_ASSERT(
deviceStreamManager != nullptr,
deviceStreamManager->stream(DeviceStreamType::PmePpTransfer));
}
- fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo, useGpuForNonbonded,
+ fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo,
+ runScheduleWork.simulationWork.useGpuNonbonded,
deviceStreamManager.get(), &mtop, box, wcycle);
// TODO: Move the logic below to a GPU bonded builder
- if (useGpuForBonded)
+ if (runScheduleWork.simulationWork.useGpuBonded)
{
GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
"GPU device stream manager should be valid in order to use GPU "
try
{
// TODO: This should be in the builder.
- GMX_RELEASE_ASSERT(!useGpuForPme || (deviceStreamManager != nullptr),
+ GMX_RELEASE_ASSERT(!runScheduleWork.simulationWork.useGpuPme
+ || (deviceStreamManager != nullptr),
"Device stream manager should be valid in order to use GPU "
"version of PME.");
GMX_RELEASE_ASSERT(
- !useGpuForPme || deviceStreamManager->streamIsValid(DeviceStreamType::Pme),
+ !runScheduleWork.simulationWork.useGpuPme
+ || deviceStreamManager->streamIsValid(DeviceStreamType::Pme),
"GPU PME stream should be valid in order to use GPU version of PME.");
- const DeviceContext* deviceContext =
- useGpuForPme ? &deviceStreamManager->context() : nullptr;
+ const DeviceContext* deviceContext = runScheduleWork.simulationWork.useGpuPme
+ ? &deviceStreamManager->context()
+ : nullptr;
const DeviceStream* pmeStream =
- useGpuForPme ? &deviceStreamManager->stream(DeviceStreamType::Pme) : nullptr;
+ runScheduleWork.simulationWork.useGpuPme
+ ? &deviceStreamManager->stream(DeviceStreamType::Pme)
+ : nullptr;
pmedata = gmx_pme_init(cr, getNumPmeDomains(cr->dd), inputrec.get(),
nChargePerturbed != 0, nTypePerturbed != 0,
domdecOptions.checkBondedInteractions, fr->cginfo_mb);
}
- // TODO This is not the right place to manage the lifetime of
- // this data structure, but currently it's the easiest way to
- // make it work.
- MdrunScheduleWorkload runScheduleWork;
- // Also populates the simulation constant workload description.
- runScheduleWork.simulationWork =
- createSimulationWorkload(*inputrec, useGpuForNonbonded, pmeRunMode, useGpuForBonded,
- useGpuForUpdate, devFlags.enableGpuBufferOps,
- devFlags.enableGpuHaloExchange, devFlags.enableGpuPmePPComm);
-
std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
if (gpusWereDetected
- && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME))
+ && ((runScheduleWork.simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME))
|| runScheduleWork.simulationWork.useGpuBufferOps))
{
GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator)