From f3453fe893db5c5696447977086793bf4423ed01 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Wed, 31 Mar 2021 00:30:33 +0000 Subject: [PATCH] Fix conditional on when DtoH forces copy occur d2d4a50b4c636c203028c5bff311924ec15e7825 introduced performance regression with forces copied from device to host on each step. This fixes the issue by reinstantiating proper condition on the copy call. Fixes #4001 Refs #2608 --- src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp | 26 +++++++++++++---------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp index afd969ee7f..63f8317443 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp @@ -815,17 +815,21 @@ void gpu_launch_cpyback(NbnxmGpu* nb, } /* DtoH f */ - static_assert(sizeof(*nbatom->out[0].f.data()) == sizeof(float), - "The host force buffer should be in single precision to match device data size."); - copyFromDeviceBuffer(reinterpret_cast(nbatom->out[0].f.data()) + atomsRange.begin(), - &adat->f, - atomsRange.begin(), - atomsRange.size(), - deviceStream, - GpuApiCallBehavior::Async, - bDoTime ? timers->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr); - - issueClFlushInStream(deviceStream); + if (!stepWork.useGpuFBufferOps) + { + static_assert( + sizeof(*nbatom->out[0].f.data()) == sizeof(float), + "The host force buffer should be in single precision to match device data size."); + copyFromDeviceBuffer(reinterpret_cast(nbatom->out[0].f.data()) + atomsRange.begin(), + &adat->f, + atomsRange.begin(), + atomsRange.size(), + deviceStream, + GpuApiCallBehavior::Async, + bDoTime ? timers->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr); + + issueClFlushInStream(deviceStream); + } /* After the non-local D2H is launched the nonlocal_done event can be recorded which signals that the local D2H can proceed. This event is not -- 2.22.0