The D2D copy is currently only used in CUDA, so only stubs are added for OpenCL and SYCL.
Closes #3321
Refs #3318
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
}
}
+/*! \brief
+ * Performs the device-to-device data copy, synchronous or asynchronously on request.
+ *
+ * \tparam ValueType Raw value type of the \p buffer.
+ * \param[in,out] destinationDeviceBuffer Device-side buffer to copy to
+ * \param[in] sourceDeviceBuffer Device-side buffer to copy from
+ * \param[in] numValues Number of values to copy.
+ * \param[in] deviceStream GPU stream to perform asynchronous copy in.
+ * \param[in] transferKind Copy type: synchronous or asynchronous.
+ * \param[out] timingEvent A dummy pointer to the D2D copy timing event to be filled
+ * in. Not used in CUDA implementation.
+ */
+template<typename ValueType>
+void copyBetweenDeviceBuffers(DeviceBuffer<ValueType>* destinationDeviceBuffer,
+ DeviceBuffer<ValueType>* sourceDeviceBuffer,
+ size_t numValues,
+ const DeviceStream& deviceStream,
+ GpuApiCallBehavior transferKind,
+ CommandEvent* /*timingEvent*/)
+{
+ if (numValues == 0)
+ {
+ return;
+ }
+ GMX_ASSERT(destinationDeviceBuffer, "needs a destination buffer pointer");
+ GMX_ASSERT(sourceDeviceBuffer, "needs a source buffer pointer");
+
+ cudaError_t stat;
+ const size_t bytes = numValues * sizeof(ValueType);
+ switch (transferKind)
+ {
+ case GpuApiCallBehavior::Async:
+ stat = cudaMemcpyAsync(*destinationDeviceBuffer,
+ *sourceDeviceBuffer,
+ bytes,
+ cudaMemcpyDeviceToDevice,
+ deviceStream.stream());
+ GMX_RELEASE_ASSERT(
+ stat == cudaSuccess,
+ ("Asynchronous D2D copy failed. " + gmx::getDeviceErrorString(stat)).c_str());
+ break;
+
+ case GpuApiCallBehavior::Sync:
+ stat = cudaMemcpy(*destinationDeviceBuffer, *sourceDeviceBuffer, bytes, cudaMemcpyDeviceToDevice);
+ GMX_RELEASE_ASSERT(
+ stat == cudaSuccess,
+ ("Synchronous D2D copy failed. " + gmx::getDeviceErrorString(stat)).c_str());
+ break;
+
+ default: throw;
+ }
+}
+
/*! \brief
* Clears the device buffer asynchronously.
*
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
}
}
+/*! \brief
+ * Performs the device-to-device data copy, synchronous or asynchronously on request.
+ *
+ * \tparam ValueType Raw value type of the \p buffer.
+ */
+template<typename ValueType>
+void copyBetweenDeviceBuffers(DeviceBuffer<ValueType>* /* destinationDeviceBuffer */,
+ DeviceBuffer<ValueType>* /* sourceDeviceBuffer */,
+ size_t /* numValues */,
+ const DeviceStream& /* deviceStream */,
+ GpuApiCallBehavior /* transferKind */,
+ CommandEvent* /*timingEvent*/)
+{
+ // OpenCL-TODO
+ gmx_fatal(FARGS, "D2D copy stub was called. Not yet implemented in OpenCL.");
+}
+
/*! \brief
* Clears the device buffer asynchronously.
*
#include "gromacs/gpu_utils/gmxsycl.h"
#include "gromacs/gpu_utils/gpu_utils.h" //only for GpuApiCallBehavior
#include "gromacs/gpu_utils/gputraits_sycl.h"
+#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/stringutil.h"
}
}
+/*! \brief
+ * Performs the device-to-device data copy, synchronous or asynchronously on request.
+ *
+ * \tparam ValueType Raw value type of the \p buffer.
+ */
+template<typename ValueType>
+void copyBetweenDeviceBuffers(DeviceBuffer<ValueType>* /* destinationDeviceBuffer */,
+ DeviceBuffer<ValueType>* /* sourceDeviceBuffer */,
+ size_t /* numValues */,
+ const DeviceStream& /* deviceStream */,
+ GpuApiCallBehavior /* transferKind */,
+ CommandEvent* /*timingEvent*/)
+{
+ // SYCL-TODO
+ gmx_fatal(FARGS, "D2D copy stub was called. Not yet implemented in SYCL.");
+}
+
namespace gmx::internal
{
}
}
+# if GMX_GPU_CUDA
+
+TYPED_TEST(DeviceBufferTest, CanCopyBetweenDeviceBuffers)
+{
+ for (auto transferKind : { GpuApiCallBehavior::Sync, GpuApiCallBehavior::Async })
+ {
+ PinningPolicy pinningPolicy = (transferKind == GpuApiCallBehavior::Async)
+ ? PinningPolicy::PinnedIfSupported
+ : PinningPolicy::CannotBePinned;
+ for (const auto& testDeviceIn : getTestHardwareEnvironment()->getTestDeviceList())
+ {
+ for (const auto& testDeviceOut : getTestHardwareEnvironment()->getTestDeviceList())
+ {
+ int numValues = 321;
+ HostVector<TypeParam> valuesIn(numValues, { pinningPolicy });
+ HostVector<TypeParam> valuesOut(numValues, { pinningPolicy });
+
+ std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue<TypeParam>);
+
+ const DeviceContext& deviceContextIn = testDeviceIn->deviceContext();
+ const DeviceStream& deviceStreamIn = testDeviceIn->deviceStream();
+ setActiveDevice(testDeviceIn->deviceInfo());
+ DeviceBuffer<TypeParam> bufferIn;
+ allocateDeviceBuffer(&bufferIn, numValues, deviceContextIn);
+
+ const DeviceContext& deviceContextOut = testDeviceOut->deviceContext();
+ const DeviceStream& deviceStreamOut = testDeviceOut->deviceStream();
+ setActiveDevice(testDeviceOut->deviceInfo());
+ DeviceBuffer<TypeParam> bufferOut;
+ allocateDeviceBuffer(&bufferOut, numValues, deviceContextOut);
+
+ copyToDeviceBuffer(
+ &bufferIn, valuesIn.data(), 0, numValues, deviceStreamIn, transferKind, nullptr);
+ copyBetweenDeviceBuffers(
+ &bufferOut, &bufferIn, numValues, deviceStreamIn, transferKind, nullptr);
+ if (transferKind == GpuApiCallBehavior::Async)
+ {
+ deviceStreamIn.synchronize();
+ }
+ copyFromDeviceBuffer(
+ valuesOut.data(), &bufferOut, 0, numValues, deviceStreamOut, transferKind, nullptr);
+ if (transferKind == GpuApiCallBehavior::Async)
+ {
+ deviceStreamOut.synchronize();
+ }
+ EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn))
+ << "Changed after H2D, D2D and D2H " << enumValueToString(transferKind)
+ << " copy.";
+ freeDeviceBuffer(&bufferIn);
+ freeDeviceBuffer(&bufferOut);
+ }
+ }
+ }
+}
+
+# endif // GMX_GPU_CUDA
+
+
} // namespace
} // namespace test
} // namespace gmx