From: Szilárd Páll Date: Wed, 26 May 2021 14:06:24 +0000 (+0000) Subject: Eliminate redundant GPU force reduction event dependency X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=db07eb0d880fee6c827418bd5d87cba1c288d5b0;p=alexxy%2Fgromacs.git Eliminate redundant GPU force reduction event dependency CPU force transfers happen on the same strem as GPU reduction, so no explicit dependency is needed. Additionally, due to a StatePropagatorDataGpu bug, whenever the force readiness event queried is not AtomLocality::All, an incorrect event is returned leading to circular dependency on force reduction, as described in #4032. This change however does not fix the StatePropagatorDataGpu bug, but it should help avoiding workarounds for the new SYCL backend (#3932). Refs #4032 #3988 --- diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 67cc9ee160..ea4d010d7a 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -1155,13 +1155,15 @@ static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork, } } - if ((runScheduleWork->domainWork.haveCpuLocalForceWork || havePPDomainDecomposition(cr)) - && !runScheduleWork->simulationWork.useGpuHaloExchange) + if (runScheduleWork->domainWork.haveCpuLocalForceWork && !runScheduleWork->simulationWork.useGpuHaloExchange) { - auto forcesReadyLocality = havePPDomainDecomposition(cr) ? AtomLocality::Local : AtomLocality::All; - const bool useGpuForceBufferOps = true; - fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency( - stateGpu->getForcesReadyOnDeviceEvent(forcesReadyLocality, useGpuForceBufferOps)); + // in the DD case we use the same stream for H2D and reduction, hence no explicit dependency needed + if (!havePPDomainDecomposition(cr)) + { + const bool useGpuForceBufferOps = true; + fr->gpuForceReduction[gmx::AtomLocality::Local]->addDependency( + stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::All, useGpuForceBufferOps)); + } } if (runScheduleWork->simulationWork.useGpuHaloExchange) @@ -1183,13 +1185,9 @@ static void setupGpuForceReductions(gmx::MdrunScheduleWorkload* runScheduleWork, accumulate); // register forces and add dependencies + // in the DD case we use the same stream for H2D and reduction, hence no explicit dependency needed fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->registerNbnxmForce( Nbnxm::gpu_get_f(nbv->gpu_nbv)); - if (runScheduleWork->domainWork.haveCpuBondedWork || runScheduleWork->domainWork.haveFreeEnergyWork) - { - fr->gpuForceReduction[gmx::AtomLocality::NonLocal]->addDependency( - stateGpu->getForcesReadyOnDeviceEvent(AtomLocality::NonLocal, true)); - } } }