From 7289ee3f214314d9a4a030d858bcee0a873506c7 Mon Sep 17 00:00:00 2001 From: Paul Bauer Date: Tue, 3 Dec 2019 11:21:47 +0100 Subject: [PATCH] Revert "Enable GPU update with DD when GPU comm features are enabled" This reverts commit b88f3eadf90261555508f6a4df64ce184141a66a. Reason for revert: Mark found the code segfaulting under some conditions with domain decomposition and update enabled, so this is not ready to be used yet. Refs #3226 Change-Id: I45cf9564a87595d445057da071a8fca5c60f9d9a --- admin/builds/gpuupdate-matrix.txt | 17 +++++++++++------ src/gromacs/mdrun/md.cpp | 7 ++----- src/gromacs/mdrun/runner.cpp | 3 +-- src/gromacs/taskassignment/decidegpuusage.cpp | 8 ++------ src/gromacs/taskassignment/decidegpuusage.h | 4 ---- 5 files changed, 16 insertions(+), 23 deletions(-) diff --git a/admin/builds/gpuupdate-matrix.txt b/admin/builds/gpuupdate-matrix.txt index 3e918921ff..5fdbc2ed0f 100644 --- a/admin/builds/gpuupdate-matrix.txt +++ b/admin/builds/gpuupdate-matrix.txt @@ -6,16 +6,21 @@ # Comment line(s) preceding each configuration document the main # intent behind that configuration, so that we can correctly judge # whether to preserve that during maintenance decisions. +# +# Both configurations currently target bs_nix1204, for better load +# balance with pre-submit matrix, which makes heavier use of +# bs_nix1310 agent. -# Test GPU update-constraints features on a single PP+PME rank +# Test newest gcc supported by newest CUDA at time of release +# Test thread-MPI with CUDA +# Test GPU update-constraints features in the above combination gcc-8 gpuhw=nvidia nranks=1 gpu_id=1 cuda-10.1 thread-mpi openmp cmake-3.10.0 release-with-assert simd=avx2_256 hwloc libhwloc-2.0.4 gpuupdate -# Test GPU update-constraints features in a CUDA build without CUDA devices +# Test CUDA build on a agent with no CUDA devices +# Test without TNG support +# Test GPU update-constraints features in the above combination gcc-7 gpuhw=none cuda-10.0 openmp no-tng release-with-assert gpuupdate +# Test OpenCL build with gpudev features # Test GPU update-constraints on the OpenCL path where it is unsupported clang-8 openmp gpuhw=amd opencl-1.2 clFFT-2.14 simd=None gpuupdate - -# Test GPU update-constraints features with multiple PP ranks and one PME rank -# Note: this should fall back correctly to the CPU codepath -gcc-5 gpuhw=nvidia cuda-9.0 cmake-3.9.6 thread-mpi npme=1 nranks=3 release-with-assert gpuupdate diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp index ee6a81fb82..4f163faf86 100644 --- a/src/gromacs/mdrun/md.cpp +++ b/src/gromacs/mdrun/md.cpp @@ -334,11 +334,8 @@ void gmx::LegacySimulator::do_md() if (useGpuForUpdate) { - GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) - || (simulationWork.useGpuDirectCommunication - && simulationWork.useGpuPmePpCommunication), - "Domain decomposition is not supported with the GPU update when not " - "using direct GPU communication.\n"); + GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr), + "Domain decomposition is not supported with the GPU update.\n"); GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps), "Either PME or short-ranged non-bonded interaction tasks must run on " "the GPU to use GPU update.\n"); diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index a9588189a0..7ce0a14071 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -896,8 +896,7 @@ int Mdrunner::mdrunner() try { useGpuForUpdate = decideWhetherToUseGpuForUpdate( - useDomainDecomposition, useGpuForPme, useGpuForNonbonded, - devFlags.enableGpuPmePPComm, devFlags.enableGpuHaloExchange, updateTarget, + useDomainDecomposition, useGpuForPme, useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop, doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0, replExParams.exchangeInterval > 0); } diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp index 8357264383..a83b2b82e0 100644 --- a/src/gromacs/taskassignment/decidegpuusage.cpp +++ b/src/gromacs/taskassignment/decidegpuusage.cpp @@ -491,8 +491,6 @@ bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded, bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition, const bool useGpuForPme, const bool useGpuForNonbonded, - const bool gpuPmePpCommIsEnabled, - const bool gpuHaloExchangeIsEnabled, const TaskTarget updateTarget, const bool gpusWereDetected, const t_inputrec& inputrec, @@ -509,11 +507,9 @@ bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition, std::string errorMessage; - if (isDomainDecomposition && (!gpuPmePpCommIsEnabled || !gpuHaloExchangeIsEnabled)) + if (isDomainDecomposition) { - errorMessage += - "Domain decomposition is not supported without GPU halo exchange and GPU PME-PP " - "communication.\n"; + errorMessage += "Domain decomposition is not supported.\n"; } // Using the GPU-version of update if: // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h index 53ce07d665..08ff410b40 100644 --- a/src/gromacs/taskassignment/decidegpuusage.h +++ b/src/gromacs/taskassignment/decidegpuusage.h @@ -234,8 +234,6 @@ bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded, * \param[in] isDomainDecomposition Whether there more than one domain. * \param[in] useGpuForPme Whether GPUs will be used for PME interactions. * \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions. - * \param[in] gpuPmePpCommIsEnabled Whether GPU PME-PP communications are enabled in this run - * \param[in] gpuHaloExchangeIsEnabled Whether GPU halo exchange is enabled in this run * \param[in] updateTarget User choice for running simulation on GPU. * \param[in] gpusWereDetected Whether compatible GPUs were detected on any node. * \param[in] inputrec The user input. @@ -251,8 +249,6 @@ bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded, bool decideWhetherToUseGpuForUpdate(bool isDomainDecomposition, bool useGpuForPme, bool useGpuForNonbonded, - bool gpuPmePpCommIsEnabled, - bool gpuHaloExchangeIsEnabled, TaskTarget updateTarget, bool gpusWereDetected, const t_inputrec& inputrec, -- 2.22.0