#include "gromacs/gpu_utils/device_stream.h"
#include "gromacs/gpu_utils/devicebuffer.h"
-#if GMX_GPU_CUDA
-# include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
-#elif GMX_GPU_SYCL
-# include "gromacs/gpu_utils/gpueventsynchronizer_sycl.h"
-#endif
+#include "gromacs/gpu_utils/gpueventsynchronizer.h"
#include "gromacs/mdlib/gpuforcereduction_impl_internal.h"
#include "gromacs/utility/gmxassert.h"
const bool accumulate,
GpuEventSynchronizer* completionMarker)
{
- GMX_ASSERT((baseForcePtr != nullptr), "Input base force for reduction has no data");
+ GMX_ASSERT(baseForcePtr, "Input base force for reduction has no data");
baseForce_ = baseForcePtr;
numAtoms_ = numAtoms;
atomStart_ = atomStart;
- accumulate_ = static_cast<int>(accumulate);
+ accumulate_ = accumulate;
completionMarker_ = completionMarker;
cellInfo_.cell = cell.data();
rvecForceToAdd_ = forcePtr;
};
-void GpuForceReduction::Impl::addDependency(GpuEventSynchronizer* const dependency)
+void GpuForceReduction::Impl::addDependency(GpuEventSynchronizer* dependency)
{
dependencyList_.push_back(dependency);
}
wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuNBFBufOps);
- if (numAtoms_ == 0)
+ if (numAtoms_ != 0)
{
- return;
+ GMX_ASSERT(nbnxmForceToAdd_, "Nbnxm force for reduction has no data");
+
+ // Enqueue wait on all dependencies passed
+ for (auto* synchronizer : dependencyList_)
+ {
+ synchronizer->enqueueWaitEvent(deviceStream_);
+ }
+
+ const bool addRvecForce = static_cast<bool>(rvecForceToAdd_); // True iff initialized
+
+ launchForceReductionKernel(numAtoms_,
+ atomStart_,
+ addRvecForce,
+ accumulate_,
+ nbnxmForceToAdd_,
+ rvecForceToAdd_,
+ baseForce_,
+ cellInfo_.d_cell,
+ deviceStream_);
}
-
- GMX_ASSERT(nbnxmForceToAdd_, "Nbnxm force for reduction has no data");
-
- // Enqueue wait on all dependencies passed
- for (auto* synchronizer : dependencyList_)
+ else
{
- synchronizer->enqueueWaitEvent(deviceStream_);
+ /* In case we have nothing to do, but still have dependencies, we need
+ * to consume them and mark our own event.
+ * Happens sometimes in MdrunVsitesTest.
+ * Issue #3988, #4227. */
+ for (auto* synchronizer : dependencyList_)
+ {
+ synchronizer->consume();
+ }
}
- const bool addRvecForce = static_cast<bool>(rvecForceToAdd_); // True iff initialized
-
- launchForceReductionKernel(numAtoms_,
- atomStart_,
- addRvecForce,
- accumulate_,
- nbnxmForceToAdd_,
- rvecForceToAdd_,
- baseForce_,
- cellInfo_.d_cell,
- deviceStream_);
-
- // Mark that kernel has been launched
+ /* Mark that kernel has been launched.
+ * Even if we have no work to do and have not launched the kernel, we still mark the event
+ * in order to ensure proper marking/consumption balance, see Issue #3988, #4227. */
if (completionMarker_ != nullptr)
{
completionMarker_->markEvent(deviceStream_);
wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
}
-GpuForceReduction::Impl::~Impl() = default;
-
GpuForceReduction::GpuForceReduction(const DeviceContext& deviceContext,
const DeviceStream& deviceStream,
gmx_wallcycle* wcycle) :
impl_->registerRvecForce(forcePtr);
}
-void GpuForceReduction::addDependency(GpuEventSynchronizer* const dependency)
+void GpuForceReduction::addDependency(GpuEventSynchronizer* dependency)
{
impl_->addDependency(dependency);
}