# This matrix is intended to permit Jenkins on-demand testing
-# of code hidden behind the GMX_USE_BUFFER_OPS feature flag
-# during development. When the feature flag is removed, the
-# normal test matrices will be adapted to cover this code path.
+# of GPU update code path during development.
#
# Comment line(s) preceding each configuration document the main
# intent behind that configuration, so that we can correctly judge
# whether to preserve that during maintenance decisions.
-#
-# Both configurations currently target bs_nix1204, for better load
-# balance with pre-submit matrix, which makes heavier use of
-# bs_nix1310 agent.
-# Test newest gcc supported by newest CUDA at time of release
-# Test thread-MPI with CUDA
-# Test GPU update-constraints features in the above combination
+# Test GPU update-constraints features on a single PP+PME rank
gcc-8 gpuhw=nvidia nranks=1 gpu_id=1 cuda-10.1 thread-mpi openmp cmake-3.10.0 release-with-assert simd=avx2_256 hwloc libhwloc-2.0.4 gpuupdate
-# Test CUDA build on a agent with no CUDA devices
-# Test without TNG support
-# Test GPU update-constraints features in the above combination
+# Test GPU update-constraints features in a CUDA build without CUDA devices
gcc-7 gpuhw=none cuda-10.0 openmp no-tng release-with-assert gpuupdate
-# Test OpenCL build with gpudev features
# Test GPU update-constraints on the OpenCL path where it is unsupported
clang-8 openmp gpuhw=amd opencl-1.2 clFFT-2.14 simd=None gpuupdate
+
+# Test GPU update-constraints features with multiple PP ranks and one PME rank
+# Note: this should fall back correctly to the CPU codepath
+gcc-5 gpuhw=nvidia cuda-9.0 cmake-3.9.6 thread-mpi npme=1 nranks=3 release-with-assert gpuupdate
return dd.comm->systemInfo.haveSplitConstraints;
}
+bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
+{
+ return dd.comm->systemInfo.useUpdateGroups;
+}
+
void dd_cycles_add(const gmx_domdec_t* dd, float cycles, int ddCycl)
{
/* Note that the cycles value can be incorrect, either 0 or some
/*! \brief Return whether constraints, not including settles, cross domain boundaries */
bool ddHaveSplitConstraints(const gmx_domdec_t& dd);
+/*! \brief Return whether update groups are used */
+bool ddUsesUpdateGroups(const gmx_domdec_t& dd);
+
/*! \brief Return whether the DD has a single dimension with a single pulse
*
* The GPU halo exchange code requires a 1D single-pulse DD, and its
}
}
- // Copy coordinate from the GPU if update is on the GPU and there are forces to be computed on
- // the CPU, or for the computation of virial. At search steps the current coordinates are
- // already on the host, hence copy is not needed.
+ // TODO Update this comment when introducing SimulationWorkload
+ //
+ // The conditions for gpuHaloExchange e.g. using GPU buffer
+ // operations were checked before construction, so here we can
+ // just use it and assert upon any conditions.
+ gmx::GpuHaloExchange* gpuHaloExchange =
+ (havePPDomainDecomposition(cr) ? cr->dd->gpuHaloExchange.get() : nullptr);
+ const bool ddUsesGpuDirectCommunication = (gpuHaloExchange != nullptr);
+ GMX_ASSERT(!ddUsesGpuDirectCommunication || (useGpuXBufOps == BufferOpsUseGpu::True),
+ "Must use coordinate buffer ops with GPU halo exchange");
+ const bool useGpuForcesHaloExchange =
+ ddUsesGpuDirectCommunication && (useGpuFBufOps == BufferOpsUseGpu::True);
+
+ // Copy coordinate from the GPU if update is on the GPU and there
+ // are forces to be computed on the CPU, or for the computation of
+ // virial, or if host-side data will be transferred from this task
+ // to a remote task for halo exchange or PME-PP communication. At
+ // search steps the current coordinates are already on the host,
+ // hence copy is not needed.
+ const bool haveHostPmePpComms =
+ !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication;
+ const bool haveHostHaloExchangeComms = havePPDomainDecomposition(cr) && !ddUsesGpuDirectCommunication;
if (simulationWork.useGpuUpdate && !stepWork.doNeighborSearch
- && (runScheduleWork->domainWork.haveCpuLocalForceWork || stepWork.computeVirial))
+ && (runScheduleWork->domainWork.haveCpuLocalForceWork || stepWork.computeVirial
+ || haveHostPmePpComms || haveHostHaloExchangeComms))
{
stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::Local);
stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
launchPmeGpuFftAndGather(fr->pmedata, wcycle);
}
- // TODO Update this comment when introducing SimulationWorkload
- //
- // The conditions for gpuHaloExchange e.g. using GPU buffer
- // operations were checked before construction, so here we can
- // just use it and assert upon any conditions.
- gmx::GpuHaloExchange* gpuHaloExchange =
- (havePPDomainDecomposition(cr) ? cr->dd->gpuHaloExchange.get() : nullptr);
- const bool ddUsesGpuDirectCommunication = (gpuHaloExchange != nullptr);
- GMX_ASSERT(!ddUsesGpuDirectCommunication || (useGpuXBufOps == BufferOpsUseGpu::True),
- "Must use coordinate buffer ops with GPU halo exchange");
- const bool useGpuForcesHaloExchange =
- ddUsesGpuDirectCommunication && (useGpuFBufOps == BufferOpsUseGpu::True);
-
/* Communicate coordinates and sum dipole if necessary +
do non-local pair search */
if (havePPDomainDecomposition(cr))
wallcycle_stop(wcycle, ewcFORCE);
}
- // If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops
- // TODO refoactor this and unify with below default-path call to the same function
- if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && simulationWork.useGpuPmePpCommunication)
+ // If on GPU PME-PP comms or GPU update path, receive forces from PME before GPU buffer ops
+ // TODO refactor this and unify with below default-path call to the same function
+ if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME)
+ && (simulationWork.useGpuPmePpCommunication || simulationWork.useGpuUpdate))
{
/* In case of node-splitting, the PP nodes receive the long-range
* forces, virial and energy from the PME nodes here.
}
}
- // TODO refoactor this and unify with above PME-PP GPU communication path call to the same function
- if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication)
+ // TODO refactor this and unify with above GPU PME-PP / GPU update path call to the same function
+ if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication
+ && !simulationWork.useGpuUpdate)
{
/* In case of node-splitting, the PP nodes receive the long-range
* forces, virial and energy from the PME nodes here.
if (useGpuForUpdate)
{
- GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr),
- "Domain decomposition is not supported with the GPU update.\n");
+ GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
+ || constr->numConstraintsTotal() == 0,
+ "Constraints in domain decomposition are only supported with update "
+ "groups if using GPU update.\n");
+ GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr
+ || constr->numConstraintsTotal() == 0,
+ "SHAKE is not supported with GPU update.");
GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
"Either PME or short-ranged non-bonded interaction tasks must run on "
"the GPU to use GPU update.\n");
#include "gromacs/mdlib/qmmm.h"
#include "gromacs/mdlib/sighandler.h"
#include "gromacs/mdlib/stophandler.h"
+#include "gromacs/mdlib/updategroups.h"
#include "gromacs/mdrun/mdmodules.h"
#include "gromacs/mdrun/simulationcontext.h"
#include "gromacs/mdrunutility/handlerestart.h"
//! True if the Buffer ops development feature is enabled
// TODO: when the trigger of the buffer ops offload is fully automated this should go away
bool enableGpuBufferOps = false;
+ //! If true, forces 'mdrun -update auto' default to 'gpu' when running with DD
+ bool forceGpuUpdateDefaultWithDD = false;
//! True if the GPU halo exchange development feature is enabled
bool enableGpuHaloExchange = false;
//! True if the PME PP direct communication GPU development feature is enabled
#pragma GCC diagnostic ignored "-Wunused-result"
devFlags.enableGpuBufferOps = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr)
&& (GMX_GPU == GMX_GPU_CUDA) && useGpuForNonbonded;
+ devFlags.forceGpuUpdateDefaultWithDD = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr);
devFlags.enableGpuHaloExchange =
(getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
devFlags.enableGpuPmePPComm =
"GMX_USE_GPU_BUFFER_OPS environment variable.");
}
+ if (devFlags.forceGpuUpdateDefaultWithDD)
+ {
+ GMX_LOG(mdlog.warning)
+ .asParagraph()
+ .appendTextFormatted(
+ "NOTE: This run will default to '-update gpu' as requested by the "
+ "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable.");
+ }
+
if (devFlags.enableGpuHaloExchange)
{
if (useGpuForNonbonded)
const DevelopmentFeatureFlags devFlags =
manageDevelopmentFeatures(mdlog, useGpuForNonbonded, pmeRunMode);
- // NOTE: The devFlags need decideWhetherToUseGpusForNonbonded(...) and decideWhetherToUseGpusForPme(...) for overrides,
- // decideWhetherToUseGpuForUpdate() needs devFlags for the '-update auto' override, hence the interleaving.
- // NOTE: When the simulationWork is constructed, the useGpuForUpdate overrides the devFlags.enableGpuBufferOps.
- try
- {
- useGpuForUpdate = decideWhetherToUseGpuForUpdate(
- useDomainDecomposition, useGpuForPme, useGpuForNonbonded, updateTarget,
- gpusWereDetected, *inputrec, mtop, doEssentialDynamics,
- gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, replExParams.exchangeInterval > 0, doRerun);
- }
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-
-
// Build restraints.
// TODO: hide restraint implementation details from Mdrunner.
// There is nothing unique about restraints at this point as far as the
// Note that local state still does not exist yet.
}
+ // The GPU update is decided here because we need to know whether the constraints or
+ // SETTLEs can span accross the domain borders (i.e. whether or not update groups are
+ // defined). This is only known after DD is initialized, hence decision on using GPU
+ // update is done so late.
+ try
+ {
+ const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false;
+
+ useGpuForUpdate = decideWhetherToUseGpuForUpdate(
+ devFlags.forceGpuUpdateDefaultWithDD, useDomainDecomposition, useUpdateGroups,
+ useGpuForPme, useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop,
+ doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
+ replExParams.exchangeInterval > 0, doRerun);
+ }
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
+
if (PAR(cr))
{
/* After possible communicator splitting in make_dd_communicators.
return gpusWereDetected && usingOurCpuForPmeOrEwald;
}
-bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
+bool decideWhetherToUseGpuForUpdate(const bool forceGpuUpdateDefaultWithDD,
+ const bool isDomainDecomposition,
+ const bool useUpdateGroups,
const bool useGpuForPme,
const bool useGpuForNonbonded,
const TaskTarget updateTarget,
return false;
}
+ const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0;
+
std::string errorMessage;
- if (isDomainDecomposition)
+ if (isDomainDecomposition && hasAnyConstraints && !useUpdateGroups)
+ {
+ errorMessage +=
+ "Domain decomposition is only supported with constraints when update groups are "
+ "used. This means constraining all bonds is not supported, except for small "
+ "molecules, and box sizes close to half the pair-list cutoff are not supported.\n ";
+ }
+ if (inputrec.eConstrAlg == econtSHAKE && hasAnyConstraints && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0)
{
- errorMessage += "Domain decomposition is not supported.\n";
+ errorMessage += "SHAKE constraints are not supported.\n";
}
// Using the GPU-version of update if:
// 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or
return false;
}
+ if (isDomainDecomposition)
+ {
+ return forceGpuUpdateDefaultWithDD;
+ }
+
return true;
}
/*! \brief Decide whether to use GPU for update.
*
- * \param[in] isDomainDecomposition Whether there more than one domain.
- * \param[in] useGpuForPme Whether GPUs will be used for PME interactions.
- * \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions.
- * \param[in] updateTarget User choice for running simulation on GPU.
- * \param[in] gpusWereDetected Whether compatible GPUs were detected on any node.
- * \param[in] inputrec The user input.
- * \param[in] mtop The global topology.
- * \param[in] useEssentialDynamics If essential dynamics is active.
- * \param[in] doOrientationRestraints If orientation restraints are enabled.
- * \param[in] useReplicaExchange If this is a REMD simulation.
- * \param[in] doRerun It this is a rerun.
+ * \param[in] forceGpuUpdateDefaultWithDD If update should run on GPU with DD by default.
+ * \param[in] isDomainDecomposition Whether there more than one domain.
+ * \param[in] useUpdateGroups If the constraints can be split across domains.
+ * \param[in] useGpuForPme Whether GPUs will be used for PME interactions.
+ * \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions.
+ * \param[in] updateTarget User choice for running simulation on GPU.
+ * \param[in] gpusWereDetected Whether compatible GPUs were detected on any node.
+ * \param[in] inputrec The user input.
+ * \param[in] mtop The global topology.
+ * \param[in] useEssentialDynamics If essential dynamics is active.
+ * \param[in] doOrientationRestraints If orientation restraints are enabled.
+ * \param[in] useReplicaExchange If this is a REMD simulation.
+ * \param[in] doRerun It this is a rerun.
*
* \returns Whether complete simulation can be run on GPU.
* \throws std::bad_alloc If out of memory
* InconsistentInputError If the user requirements are inconsistent.
*/
-bool decideWhetherToUseGpuForUpdate(bool isDomainDecomposition,
+bool decideWhetherToUseGpuForUpdate(bool forceGpuUpdateDefaultWithDD,
+ bool isDomainDecomposition,
+ bool useUpdateGroups,
bool useGpuForPme,
bool useGpuForNonbonded,
TaskTarget updateTarget,