From: Artem Zhmurov Date: Thu, 24 Oct 2019 17:15:40 +0000 (+0200) Subject: Switch the GPU buffer ops on when update is on GPU X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=6e01b028bc95bb6014d027e2edb6c65715a0572b;p=alexxy%2Fgromacs.git Switch the GPU buffer ops on when update is on GPU The update is supported on the GPU only when buffer ops are also offloaded. This changes the behavior from requiring the GMX_USE_GPU_BUFFER_OPS to be enabled to it being overriden. Change-Id: Icdc154daa053f135b0df503697273016a830fb18 --- diff --git a/admin/builds/gromacs.py b/admin/builds/gromacs.py index 4fa518f186..49007c5a63 100644 --- a/admin/builds/gromacs.py +++ b/admin/builds/gromacs.py @@ -185,7 +185,6 @@ def do_build(context): # GPU update flag enables GPU update+constraints as well as buffer ops (dependency) if context.opts.gpuupdate: - context.env.set_env_var('GMX_USE_GPU_BUFFER_OPS', "1") context.env.set_env_var('GMX_FORCE_UPDATE_DEFAULT_GPU', "1") regressiontests_path = context.workspace.get_project_dir(Project.REGRESSIONTESTS) diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index bf6532b3e0..245d6f9e50 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -885,6 +885,27 @@ int Mdrunner::mdrunner() // and report those features that are enabled. const DevelopmentFeatureFlags devFlags = manageDevelopmentFeatures(mdlog, useGpuForNonbonded, useGpuForPme); + // NOTE: The devFlags need decideWhetherToUseGpusForNonbonded(...) and decideWhetherToUseGpusForPme(...) for overrides, + // decideWhetherToUseGpuForUpdate() needs devFlags for the '-update auto' override, hence the interleaving. + // NOTE: When the simulationWork is constructed, the useGpuForUpdate overrides the devFlags.enableGpuBufferOps. + try + { + useGpuForUpdate = decideWhetherToUseGpuForUpdate(devFlags.forceGpuUpdateDefaultOn, + useDomainDecomposition, + useGpuForPme, + useGpuForNonbonded, + updateTarget, + gpusWereDetected, + *inputrec, + gmx_mtop_interaction_count(mtop, IF_VSITE) > 0, + doEssentialDynamics, + gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, + gmx_mtop_ftype_count(mtop, F_DISRES) > 0, + replExParams.exchangeInterval > 0); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + + // Build restraints. // TODO: hide restraint implementation details from Mdrunner. // There is nothing unique about restraints at this point as far as the @@ -1333,7 +1354,7 @@ int Mdrunner::mdrunner() // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093 if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd)) { - GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps, "Must use GMX_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1"); + GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps, "Must use GMX_USE_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1"); void *streamLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local); void *streamNonLocal = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal); void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event(); @@ -1545,21 +1566,6 @@ int Mdrunner::mdrunner() fr->cginfo_mb); } - // Before we start the actual simulator, try if we can run the update task on the GPU. - useGpuForUpdate = decideWhetherToUseGpuForUpdate(devFlags.forceGpuUpdateDefaultOn, - DOMAINDECOMP(cr), - useGpuForPme, - useGpuForNonbonded, - devFlags.enableGpuBufferOps, - updateTarget, - gpusWereDetected, - *inputrec, - mdAtoms->mdatoms()->haveVsites, - doEssentialDynamics, - gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, - gmx_mtop_ftype_count(mtop, F_DISRES) > 0, - replExParams.exchangeInterval > 0); - const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible( false, inputrec, doRerun, vsite.get(), ms, replExParams, @@ -1568,8 +1574,21 @@ int Mdrunner::mdrunner() const bool useModularSimulator = inputIsCompatibleWithModularSimulator && !(getenv("GMX_DISABLE_MODULAR_SIMULATOR") != nullptr); + // TODO This is not the right place to manage the lifetime of + // this data structure, but currently it's the easiest way to + // make it work. + MdrunScheduleWorkload runScheduleWork; + // Also populates the simulation constant workload description. + runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded, + pmeRunMode, + useGpuForBonded, + useGpuForUpdate, + devFlags.enableGpuBufferOps, + devFlags.enableGpuHaloExchange, + devFlags.enableGpuPmePPComm); + std::unique_ptr stateGpu; - if (gpusWereDetected && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME)) || devFlags.enableGpuBufferOps)) + if (gpusWereDetected && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME)) || runScheduleWork.simulationWork.useGpuBufferOps)) { const void *pmeStream = pme_gpu_get_device_stream(fr->pmedata); const void *localStream = fr->nbv->gpu_nbv != nullptr ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local) : nullptr; @@ -1587,20 +1606,6 @@ int Mdrunner::mdrunner() fr->stateGpu = stateGpu.get(); } - // TODO This is not the right place to manage the lifetime of - // this data structure, but currently it's the easiest way to - // make it work. - MdrunScheduleWorkload runScheduleWork; - // Also populates the simulation constant workload description. - runScheduleWork.simulationWork = createSimulationWorkload(useGpuForNonbonded, - pmeRunMode, - useGpuForBonded, - useGpuForUpdate, - devFlags.enableGpuBufferOps, - devFlags.enableGpuHaloExchange, - devFlags.enableGpuPmePPComm); - - GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator."); SimulatorBuilder simulatorBuilder; diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp index 71ca58d158..f40814e23a 100644 --- a/src/gromacs/taskassignment/decidegpuusage.cpp +++ b/src/gromacs/taskassignment/decidegpuusage.cpp @@ -494,7 +494,6 @@ bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefaultOn, const bool isDomainDecomposition, const bool useGpuForPme, const bool useGpuForNonbonded, - const bool useGpuForBufferOps, const TaskTarget updateTarget, const bool gpusWereDetected, const t_inputrec &inputrec, @@ -516,10 +515,10 @@ bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefaultOn, { errorMessage += "Domain decomposition is not supported.\n"; } - // Using the GPU-version of update makes sense if forces are already on the GPU, i.e. if at least: - // 1. PME is on the GPU (there should be a copy of coordinates on a GPU in rvec format for PME spread). - // 2. Non-bonded interactions and buffer ops are on the GPU. - if (!(useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps))) + // Using the GPU-version of update if: + // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or + // 2. Non-bonded interactions are on the GPU. + if (!(useGpuForPme || useGpuForNonbonded)) { errorMessage += "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n"; } diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h index c74ca0a197..6b8685fa3e 100644 --- a/src/gromacs/taskassignment/decidegpuusage.h +++ b/src/gromacs/taskassignment/decidegpuusage.h @@ -235,7 +235,6 @@ bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded, * \param[in] isDomainDecomposition Whether there more than one domain. * \param[in] useGpuForPme Whether GPUs will be used for PME interactions. * \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions. - * \param[in] useGpuForBufferOps Whether GPUs will be used for buffer operations. * \param[in] updateTarget User choice for running simulation on GPU. * \param[in] gpusWereDetected Whether compatible GPUs were detected on any node. * \param[in] inputrec The user input. @@ -253,7 +252,6 @@ bool decideWhetherToUseGpuForUpdate(bool forceGpuUpdateDefaultOn, bool isDomainDecomposition, bool useGpuForPme, bool useGpuForNonbonded, - bool useGpuForBufferOps, TaskTarget updateTarget, bool gpusWereDetected, const t_inputrec &inputrec, diff --git a/src/gromacs/taskassignment/decidesimulationworkload.cpp b/src/gromacs/taskassignment/decidesimulationworkload.cpp index 597cf1a815..ea82f95f42 100644 --- a/src/gromacs/taskassignment/decidesimulationworkload.cpp +++ b/src/gromacs/taskassignment/decidesimulationworkload.cpp @@ -53,7 +53,7 @@ namespace gmx SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded, PmeRunMode pmeRunMode, bool useGpuForBonded, - bool useGpuForUpdateConstraints, + bool useGpuForUpdate, bool useGpuForBufferOps, bool useGpuHaloExchange, bool useGpuPmePpComm) @@ -65,8 +65,8 @@ SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded, simulationWorkload.useGpuPme = (pmeRunMode == PmeRunMode::GPU || pmeRunMode == PmeRunMode::Mixed); simulationWorkload.useGpuPmeFft = (pmeRunMode == PmeRunMode::Mixed); simulationWorkload.useGpuBonded = useGpuForBonded; - simulationWorkload.useGpuUpdate = useGpuForUpdateConstraints; - simulationWorkload.useGpuBufferOps = useGpuForBufferOps; + simulationWorkload.useGpuUpdate = useGpuForUpdate; + simulationWorkload.useGpuBufferOps = useGpuForBufferOps || useGpuForUpdate; simulationWorkload.useGpuHaloExchange = useGpuHaloExchange; simulationWorkload.useGpuPmePpCommunication = useGpuPmePpComm; simulationWorkload.useGpuDirectCommunication = useGpuHaloExchange || useGpuPmePpComm; diff --git a/src/gromacs/taskassignment/decidesimulationworkload.h b/src/gromacs/taskassignment/decidesimulationworkload.h index 0e87da68c5..b389da53c1 100644 --- a/src/gromacs/taskassignment/decidesimulationworkload.h +++ b/src/gromacs/taskassignment/decidesimulationworkload.h @@ -59,8 +59,8 @@ namespace gmx * calculations on GPU(s). * \param[in] pmeRunMode Run mode indicating what resource is PME execured on. * \param[in] useGpuForBonded If bonded interactions are calculated on GPU(s). - * \param[in] useGpuForUpdateConstraints If coordinate update and constraint solving is performed on - * GPU(s). + * \param[in] useGpuForUpdate If coordinate update and constraint solving is performed on + * GPU(s). * \param[in] useGpuForBufferOps If buffer ops / reduction are calculated on GPU(s). * \param[in] useGpuHaloExchange If GPU direct communication is used in halo exchange. * \param[in] useGpuPmePpComm If GPu direct communication is used in PME-PP communication. @@ -69,7 +69,7 @@ namespace gmx SimulationWorkload createSimulationWorkload(bool useGpuForNonbonded, PmeRunMode pmeRunMode, bool useGpuForBonded, - bool useGpuForUpdateConstraints, + bool useGpuForUpdate, bool useGpuForBufferOps, bool useGpuHaloExchange, bool useGpuPmePpComm);