// Clearing of the forces can be done in local stream since the nonlocal stream cannot reach
// the force accumulation stage before syncing with the local stream. Only done in CUDA and
// SYCL, since the force buffer ops are not implemented in OpenCL.
// Clearing of the forces can be done in local stream since the nonlocal stream cannot reach
// the force accumulation stage before syncing with the local stream. Only done in CUDA and
// SYCL, since the force buffer ops are not implemented in OpenCL.
{
clearDeviceBufferAsync(&d_f_, 0, d_fCapacity_, *localStream_);
}
{
clearDeviceBufferAsync(&d_f_, 0, d_fCapacity_, *localStream_);
}