From: Artem Zhmurov Date: Wed, 31 Mar 2021 00:30:33 +0000 (+0000) Subject: Fix conditional on when DtoH forces copy occur X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=f3453fe893db5c5696447977086793bf4423ed01;p=alexxy%2Fgromacs.git Fix conditional on when DtoH forces copy occur d2d4a50b4c636c203028c5bff311924ec15e7825 introduced performance regression with forces copied from device to host on each step. This fixes the issue by reinstantiating proper condition on the copy call. Fixes #4001 Refs #2608 --- diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp index afd969ee7f..63f8317443 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp +++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp @@ -815,17 +815,21 @@ void gpu_launch_cpyback(NbnxmGpu* nb, } /* DtoH f */ - static_assert(sizeof(*nbatom->out[0].f.data()) == sizeof(float), - "The host force buffer should be in single precision to match device data size."); - copyFromDeviceBuffer(reinterpret_cast(nbatom->out[0].f.data()) + atomsRange.begin(), - &adat->f, - atomsRange.begin(), - atomsRange.size(), - deviceStream, - GpuApiCallBehavior::Async, - bDoTime ? timers->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr); - - issueClFlushInStream(deviceStream); + if (!stepWork.useGpuFBufferOps) + { + static_assert( + sizeof(*nbatom->out[0].f.data()) == sizeof(float), + "The host force buffer should be in single precision to match device data size."); + copyFromDeviceBuffer(reinterpret_cast(nbatom->out[0].f.data()) + atomsRange.begin(), + &adat->f, + atomsRange.begin(), + atomsRange.size(), + deviceStream, + GpuApiCallBehavior::Async, + bDoTime ? timers->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr); + + issueClFlushInStream(deviceStream); + } /* After the non-local D2H is launched the nonlocal_done event can be recorded which signals that the local D2H can proceed. This event is not