From: Szilárd Páll Date: Mon, 2 Dec 2019 18:29:28 +0000 (+0100) Subject: Allow overlapping CPU force H2D with compute X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=86a27bc2bba63500b679b30c8ad7913fc5425680;p=alexxy%2Fgromacs.git Allow overlapping CPU force H2D with compute The reduction orchestration code already uses explicit sync event in all cases and StateGpu implements the ability to schedule force H2D in a separate stream for the "All" locality. Hence, this change switches for non-DD runs the CPU force H2D to be done in the update stream to allow overlap with force work in the local stream. Refs #3170 #3029 Change-Id: Iceb9aac395335c062109d552d3f0289688a9c75f --- diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 832ff21620..86549f915b 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -1706,9 +1706,16 @@ void do_force(FILE* fplog, // These should be unified. if (haveLocalForceContribInCpuBuffer && !useGpuForcesHaloExchange) { - stateGpu->copyForcesToGpu(forceWithShift, AtomLocality::Local); + // Note: AtomLocality::All is used for the non-DD case because, as in this + // case copyForcesToGpu() uses a separate stream, it allows overlap of + // CPU force H2D with GPU force tasks on all streams including those in the + // local stream which would otherwise be implicit dependencies for the + // transfer and would not overlap. + auto locality = havePPDomainDecomposition(cr) ? AtomLocality::Local : AtomLocality::All; + + stateGpu->copyForcesToGpu(forceWithShift, locality); dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent( - AtomLocality::Local, useGpuFBufOps == BufferOpsUseGpu::True)); + locality, useGpuFBufOps == BufferOpsUseGpu::True)); } if (useGpuForcesHaloExchange) {