From: Alan Gray Date: Tue, 5 Nov 2019 12:52:21 +0000 (-0800) Subject: Enable GPU update with DD when GPU comm features are enabled X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=b88f3eadf90261555508f6a4df64ce184141a66a;p=alexxy%2Fgromacs.git Enable GPU update with DD when GPU comm features are enabled Changes assertions to allow GPU update to run with domain decomposition, only when GPU PME-PP communication and GPU halo exchange features are enabled through their respective environment variables. Added test coverage to the gpuupdate matrix Change-Id: Ie05dac64eada8f704c3a33d0eb6e5bc74dfcc61e --- diff --git a/admin/builds/gpuupdate-matrix.txt b/admin/builds/gpuupdate-matrix.txt index 5fdbc2ed0f..3e918921ff 100644 --- a/admin/builds/gpuupdate-matrix.txt +++ b/admin/builds/gpuupdate-matrix.txt @@ -6,21 +6,16 @@ # Comment line(s) preceding each configuration document the main # intent behind that configuration, so that we can correctly judge # whether to preserve that during maintenance decisions. -# -# Both configurations currently target bs_nix1204, for better load -# balance with pre-submit matrix, which makes heavier use of -# bs_nix1310 agent. -# Test newest gcc supported by newest CUDA at time of release -# Test thread-MPI with CUDA -# Test GPU update-constraints features in the above combination +# Test GPU update-constraints features on a single PP+PME rank gcc-8 gpuhw=nvidia nranks=1 gpu_id=1 cuda-10.1 thread-mpi openmp cmake-3.10.0 release-with-assert simd=avx2_256 hwloc libhwloc-2.0.4 gpuupdate -# Test CUDA build on a agent with no CUDA devices -# Test without TNG support -# Test GPU update-constraints features in the above combination +# Test GPU update-constraints features in a CUDA build without CUDA devices gcc-7 gpuhw=none cuda-10.0 openmp no-tng release-with-assert gpuupdate -# Test OpenCL build with gpudev features # Test GPU update-constraints on the OpenCL path where it is unsupported clang-8 openmp gpuhw=amd opencl-1.2 clFFT-2.14 simd=None gpuupdate + +# Test GPU update-constraints features with multiple PP ranks and one PME rank +# Note: this should fall back correctly to the CPU codepath +gcc-5 gpuhw=nvidia cuda-9.0 cmake-3.9.6 thread-mpi npme=1 nranks=3 release-with-assert gpuupdate diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp index 4f163faf86..ee6a81fb82 100644 --- a/src/gromacs/mdrun/md.cpp +++ b/src/gromacs/mdrun/md.cpp @@ -334,8 +334,11 @@ void gmx::LegacySimulator::do_md() if (useGpuForUpdate) { - GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr), - "Domain decomposition is not supported with the GPU update.\n"); + GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) + || (simulationWork.useGpuDirectCommunication + && simulationWork.useGpuPmePpCommunication), + "Domain decomposition is not supported with the GPU update when not " + "using direct GPU communication.\n"); GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps), "Either PME or short-ranged non-bonded interaction tasks must run on " "the GPU to use GPU update.\n"); diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 7ce0a14071..a9588189a0 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -896,7 +896,8 @@ int Mdrunner::mdrunner() try { useGpuForUpdate = decideWhetherToUseGpuForUpdate( - useDomainDecomposition, useGpuForPme, useGpuForNonbonded, updateTarget, + useDomainDecomposition, useGpuForPme, useGpuForNonbonded, + devFlags.enableGpuPmePPComm, devFlags.enableGpuHaloExchange, updateTarget, gpusWereDetected, *inputrec, mtop, doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, replExParams.exchangeInterval > 0); } diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp index a83b2b82e0..8357264383 100644 --- a/src/gromacs/taskassignment/decidegpuusage.cpp +++ b/src/gromacs/taskassignment/decidegpuusage.cpp @@ -491,6 +491,8 @@ bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded, bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition, const bool useGpuForPme, const bool useGpuForNonbonded, + const bool gpuPmePpCommIsEnabled, + const bool gpuHaloExchangeIsEnabled, const TaskTarget updateTarget, const bool gpusWereDetected, const t_inputrec& inputrec, @@ -507,9 +509,11 @@ bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition, std::string errorMessage; - if (isDomainDecomposition) + if (isDomainDecomposition && (!gpuPmePpCommIsEnabled || !gpuHaloExchangeIsEnabled)) { - errorMessage += "Domain decomposition is not supported.\n"; + errorMessage += + "Domain decomposition is not supported without GPU halo exchange and GPU PME-PP " + "communication.\n"; } // Using the GPU-version of update if: // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h index 08ff410b40..53ce07d665 100644 --- a/src/gromacs/taskassignment/decidegpuusage.h +++ b/src/gromacs/taskassignment/decidegpuusage.h @@ -234,6 +234,8 @@ bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded, * \param[in] isDomainDecomposition Whether there more than one domain. * \param[in] useGpuForPme Whether GPUs will be used for PME interactions. * \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions. + * \param[in] gpuPmePpCommIsEnabled Whether GPU PME-PP communications are enabled in this run + * \param[in] gpuHaloExchangeIsEnabled Whether GPU halo exchange is enabled in this run * \param[in] updateTarget User choice for running simulation on GPU. * \param[in] gpusWereDetected Whether compatible GPUs were detected on any node. * \param[in] inputrec The user input. @@ -249,6 +251,8 @@ bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded, bool decideWhetherToUseGpuForUpdate(bool isDomainDecomposition, bool useGpuForPme, bool useGpuForNonbonded, + bool gpuPmePpCommIsEnabled, + bool gpuHaloExchangeIsEnabled, TaskTarget updateTarget, bool gpusWereDetected, const t_inputrec& inputrec,