update and constraints can be offloaded to a GPU with CUDA. Thus all compute
intensive parts of a simulation can be offloaded, which provides
better performance when using a fast GPU combined with a slow CPU.
+By default, update will run on the CPU, to use GPU in single rank simulations,
+one can use new '-update gpu' command line option.
For use with domain decomposition, please see below.
GPU Direct Communications
//! True if the Buffer ops development feature is enabled
// TODO: when the trigger of the buffer ops offload is fully automated this should go away
bool enableGpuBufferOps = false;
- //! If true, forces 'mdrun -update auto' default to 'gpu' when running with DD
- bool forceGpuUpdateDefaultWithDD = false;
+ //! If true, forces 'mdrun -update auto' default to 'gpu'
+ bool forceGpuUpdateDefault = false;
//! True if the GPU halo exchange development feature is enabled
bool enableGpuHaloExchange = false;
//! True if the PME PP direct communication GPU development feature is enabled
#pragma GCC diagnostic ignored "-Wunused-result"
devFlags.enableGpuBufferOps = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr)
&& (GMX_GPU == GMX_GPU_CUDA) && useGpuForNonbonded;
- devFlags.forceGpuUpdateDefaultWithDD = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr);
+ devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr);
devFlags.enableGpuHaloExchange =
(getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
devFlags.enableGpuPmePPComm =
"GMX_USE_GPU_BUFFER_OPS environment variable.");
}
- if (devFlags.forceGpuUpdateDefaultWithDD)
+ if (devFlags.forceGpuUpdateDefault)
{
GMX_LOG(mdlog.warning)
.asParagraph()
const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false;
useGpuForUpdate = decideWhetherToUseGpuForUpdate(
- devFlags.forceGpuUpdateDefaultWithDD, useDomainDecomposition, useUpdateGroups,
- useGpuForPme, useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop,
- doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
- replExParams.exchangeInterval > 0, doRerun);
+ devFlags.forceGpuUpdateDefault, useDomainDecomposition, useUpdateGroups, useGpuForPme,
+ useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop, doEssentialDynamics,
+ gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, replExParams.exchangeInterval > 0, doRerun);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
return gpusWereDetected && usingOurCpuForPmeOrEwald;
}
-bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefaultWithDD,
+bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefault,
const bool isDomainDecomposition,
const bool useUpdateGroups,
const bool useGpuForPme,
const bool doRerun)
{
- if (updateTarget == TaskTarget::Cpu)
+ // '-update cpu' overrides the environment variable, '-update auto' does not
+ if (updateTarget == TaskTarget::Cpu || (updateTarget == TaskTarget::Auto && !forceGpuUpdateDefault))
{
return false;
}
if (isDomainDecomposition)
{
- if (!forceGpuUpdateDefaultWithDD)
+ if (!forceGpuUpdateDefault)
{
errorMessage += "Domain decomposition is not supported.\n ";
}
if (isDomainDecomposition)
{
- return forceGpuUpdateDefaultWithDD;
+ return forceGpuUpdateDefault;
+ }
+ else
+ {
+ return (updateTarget == TaskTarget::Gpu || forceGpuUpdateDefault);
}
-
- return true;
}
} // namespace gmx
/*! \brief Decide whether to use GPU for update.
*
- * \param[in] forceGpuUpdateDefaultWithDD If update should run on GPU with DD by default.
+ * \param[in] forceGpuUpdateDefault If update should run on GPU by default.
* \param[in] isDomainDecomposition Whether there more than one domain.
* \param[in] useUpdateGroups If the constraints can be split across domains.
* \param[in] useGpuForPme Whether GPUs will be used for PME interactions.
* \throws std::bad_alloc If out of memory
* InconsistentInputError If the user requirements are inconsistent.
*/
-bool decideWhetherToUseGpuForUpdate(bool forceGpuUpdateDefaultWithDD,
+bool decideWhetherToUseGpuForUpdate(bool forceGpuUpdateDefault,
bool isDomainDecomposition,
bool useUpdateGroups,
bool useGpuForPme,