... instead of raw device pointers and DeviceBuffer<float3>.
We try to use DeviceBuffer<Float3>, but in some places we have to use
DeviceVector<gmx::RVec>, until we can define FloatN types without
including any backend-specific headers. Currently, Float3 is defined as
gmx::RVec, so this should not cause any issues.
Also added some helper functions to convert RVec ̌<-> Float3 <-> float3.
Preparation for #3932 and #3941.
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
//! \brief Single GPU call timing event
using CommandEvent = void*;
+// Stubs for CPU-only build. Might be changed in #3312.
+struct Float2
+{
+};
+struct Float3
+{
+};
+struct Float4
+{
+};
+
#endif // GMX_GPU
+namespace gmx
+{
+template<typename T>
+static inline Float3* asGenericFloat3Pointer(T* in)
+{
+ static_assert(sizeof(T) == sizeof(Float3),
+ "Size of the host-side data-type is different from the size of the generic "
+ "device-side counterpart.");
+ return reinterpret_cast<Float3*>(in);
+}
+
+template<typename T>
+static inline const Float3* asGenericFloat3Pointer(const T* in)
+{
+ static_assert(sizeof(T) == sizeof(Float3),
+ "Size of the host-side data-type is different from the size of the generic "
+ "device-side counterpart.");
+ return reinterpret_cast<const Float3*>(in);
+}
+
+template<typename C>
+static inline Float3* asGenericFloat3Pointer(C& in)
+{
+ static_assert(sizeof(*in.data()) == sizeof(Float3),
+ "Size of the host-side data-type is different from the size of the device-side "
+ "counterpart.");
+ return reinterpret_cast<Float3*>(in.data());
+}
+
+template<typename C>
+static inline const Float3* asGenericFloat3Pointer(const C& in)
+{
+ static_assert(sizeof(*in.data()) == sizeof(Float3),
+ "Size of the host-side data-type is different from the size of the device-side "
+ "counterpart.");
+ return reinterpret_cast<const Float3*>(in.data());
+}
+} // namespace gmx
+
#endif // GMX_GPU_UTILS_GPUTRAITS_H
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2020, by the GROMACS development team, led by
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
return reinterpret_cast<float3*>(in);
}
+/*! \brief Cast pointer RVec buffer to a pointer to float3 buffer.
+ *
+ * \param[in] in The Pointer to RVec buffer to cast.
+ *
+ * \returns Buffer pointer, casted to float3*.
+ */
+static inline __host__ __device__ float3** asFloat3Pointer(gmx::RVec** in)
+{
+ static_assert(sizeof((*in)[0]) == sizeof(float3),
+ "Size of the host-side data-type is different from the size of the device-side "
+ "counterpart.");
+ return reinterpret_cast<float3**>(in);
+}
+static inline __host__ __device__ const float3* const* asFloat3Pointer(const gmx::RVec* const* in)
+{
+ static_assert(sizeof((*in)[0]) == sizeof(float3),
+ "Size of the host-side data-type is different from the size of the device-side "
+ "counterpart.");
+ return reinterpret_cast<const float3* const*>(in);
+}
+
#endif // GMX_GPU_UTILS_TYPECASTS_CUH
*
* \param [in] forcePtr Pointer to force to be reduced
*/
- void registerNbnxmForce(void* forcePtr);
+ void registerNbnxmForce(DeviceBuffer<RVec> forcePtr);
/*! \brief Register a rvec-format force to be reduced
*
}
// NOLINTNEXTLINE readability-convert-member-functions-to-static
-void GpuForceReduction::registerNbnxmForce(void* /* forcePtr */)
+void GpuForceReduction::registerNbnxmForce(DeviceBuffer<RVec> /* forcePtr */)
{
GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
}
{
}
-void GpuForceReduction::registerNbnxmForce(void* forcePtr)
+void GpuForceReduction::registerNbnxmForce(DeviceBuffer<Float3> forcePtr)
{
- impl_->registerNbnxmForce(reinterpret_cast<DeviceBuffer<RVec>>(forcePtr));
+ impl_->registerNbnxmForce(forcePtr);
}
void GpuForceReduction::registerRvecForce(void* forcePtr)
#include "gromacs/gpu_utils/device_stream.h"
#include "gromacs/gpu_utils/devicebuffer_datatype.h"
+#include "gromacs/gpu_utils/gputraits.h"
#include "gromacs/math/vectypes.h"
#include "gpuforcereduction.h"
*
* \param [in] forcePtr Pointer to force to be reduced
*/
- void registerNbnxmForce(DeviceBuffer<RVec> forcePtr);
+ void registerNbnxmForce(DeviceBuffer<Float3> forcePtr);
/*! \brief Register a rvec-format force to be reduced
*
* \param [in] forcePtr Pointer to force to be reduced
*/
- void registerRvecForce(DeviceBuffer<RVec> forcePtr);
+ void registerRvecForce(DeviceBuffer<Float3> forcePtr);
/*! \brief Add a dependency for this force reduction
*
#include "gromacs/gpu_utils/cudautils.cuh"
#include "gromacs/gpu_utils/devicebuffer.h"
+#include "gromacs/gpu_utils/typecasts.cuh"
#include "gromacs/gpu_utils/vectype_ops.cuh"
#include "gromacs/math/vec.h"
#include "gromacs/mdtypes/group.h"
return kernelPtr;
}
-void LeapFrogGpu::integrate(const DeviceBuffer<float3> d_x,
- DeviceBuffer<float3> d_xp,
- DeviceBuffer<float3> d_v,
- const DeviceBuffer<float3> d_f,
+void LeapFrogGpu::integrate(DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ DeviceBuffer<Float3> d_v,
+ const DeviceBuffer<Float3> d_f,
const real dt,
const bool doTemperatureScaling,
gmx::ArrayRef<const t_grp_tcstat> tcstat,
"Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
"in GPU version of Leap-Frog integrator.");
prVelocityScalingMatrixDiagonal_ =
- make_float3(dtPressureCouple * prVelocityScalingMatrix[XX][XX],
- dtPressureCouple * prVelocityScalingMatrix[YY][YY],
- dtPressureCouple * prVelocityScalingMatrix[ZZ][ZZ]);
+ Float3{ dtPressureCouple * prVelocityScalingMatrix[XX][XX],
+ dtPressureCouple * prVelocityScalingMatrix[YY][YY],
+ dtPressureCouple * prVelocityScalingMatrix[ZZ][ZZ] };
}
kernelPtr = selectLeapFrogKernelPtr(doTemperatureScaling, numTempScaleValues_, prVelocityScalingType);
}
+ // Checking the buffer types against the kernel argument types
+ static_assert(sizeof(*d_inverseMasses_) == sizeof(float));
const auto kernelArgs = prepareGpuKernelArguments(kernelPtr,
kernelLaunchConfig_,
&numAtoms_,
- &d_x,
- &d_xp,
- &d_v,
- &d_f,
+ asFloat3Pointer(&d_x),
+ asFloat3Pointer(&d_xp),
+ asFloat3Pointer(&d_v),
+ asFloat3Pointer(&d_f),
&d_inverseMasses_,
&dt,
&d_lambdas_,
reallocateDeviceBuffer(
&d_inverseMasses_, numAtoms_, &numInverseMasses_, &numInverseMassesAlloc_, deviceContext_);
copyToDeviceBuffer(
- &d_inverseMasses_, (float*)inverseMasses, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
+ &d_inverseMasses_, inverseMasses, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
// Temperature scale group map only used if there are more then one group
if (numTempScaleValues_ > 1)
#include "config.h"
#if GMX_GPU_CUDA
-# include "gromacs/gpu_utils/devicebuffer.cuh"
# include "gromacs/gpu_utils/gputraits.cuh"
#endif
#if GMX_GPU_SYCL
-# include "gromacs/gpu_utils/devicebuffer_sycl.h"
# include "gromacs/gpu_utils/gputraits_sycl.h"
-using float3 = Float3;
#endif
#include <memory>
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/hostallocator.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/pbcutil/pbc_aiuc.h"
#include "gromacs/utility/arrayref.h"
class DeviceContext;
* \param[in] dtPressureCouple Period between pressure coupling steps
* \param[in] prVelocityScalingMatrix Parrinello-Rahman velocity scaling matrix
*/
- void integrate(const DeviceBuffer<float3> d_x,
- DeviceBuffer<float3> d_xp,
- DeviceBuffer<float3> d_v,
- const DeviceBuffer<float3> d_f,
+ void integrate(DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ DeviceBuffer<Float3> d_v,
+ const DeviceBuffer<Float3> d_f,
const real dt,
const bool doTemperatureScaling,
gmx::ArrayRef<const t_grp_tcstat> tcstat,
int numTempScaleGroupsAlloc_ = -1;
//! Vector with diagonal elements of the Parrinello-Rahman pressure coupling velocity rescale factors
- float3 prVelocityScalingMatrixDiagonal_;
+ Float3 prVelocityScalingMatrixDiagonal_;
};
} // namespace gmx
template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
auto leapFrogKernel(
cl::sycl::handler& cgh,
- DeviceAccessor<float3, mode::read_write> a_x,
- DeviceAccessor<float3, mode::discard_write> a_xp,
- DeviceAccessor<float3, mode::read_write> a_v,
- DeviceAccessor<float3, mode::read> a_f,
+ DeviceAccessor<Float3, mode::read_write> a_x,
+ DeviceAccessor<Float3, mode::discard_write> a_xp,
+ DeviceAccessor<Float3, mode::read_write> a_v,
+ DeviceAccessor<Float3, mode::read> a_f,
DeviceAccessor<float, mode::read> a_inverseMasses,
float dt,
OptionalAccessor<float, mode::read, numTempScaleValues != NumTempScaleValues::None> a_lambdas,
OptionalAccessor<unsigned short, mode::read, numTempScaleValues == NumTempScaleValues::Multiple> a_tempScaleGroups,
- float3 prVelocityScalingMatrixDiagonal)
+ Float3 prVelocityScalingMatrixDiagonal)
{
cgh.require(a_x);
cgh.require(a_xp);
}
return [=](cl::sycl::id<1> itemIdx) {
- const float3 x = a_x[itemIdx];
- const float3 v = a_v[itemIdx];
- const float3 f = a_f[itemIdx];
+ const Float3 x = a_x[itemIdx];
+ const Float3 v = a_v[itemIdx];
+ const Float3 f = a_f[itemIdx];
const float im = a_inverseMasses[itemIdx];
const float imdt = im * dt;
}
}();
- const float3 prVelocityDelta = [=]() {
+ const Float3 prVelocityDelta = [=]() {
if constexpr (velocityScaling == VelocityScalingType::Diagonal)
{
- return float3{ prVelocityScalingMatrixDiagonal[0] * v[0],
+ return Float3{ prVelocityScalingMatrixDiagonal[0] * v[0],
prVelocityScalingMatrixDiagonal[1] * v[1],
prVelocityScalingMatrixDiagonal[2] * v[2] };
}
else if constexpr (velocityScaling == VelocityScalingType::None)
{
- return float3{ 0, 0, 0 };
+ return Float3{ 0, 0, 0 };
}
}();
- const float3 v_new = v * lambda - prVelocityDelta + f * imdt;
+ const Float3 v_new = v * lambda - prVelocityDelta + f * imdt;
a_v[itemIdx] = v_new;
a_x[itemIdx] = x + v_new * dt;
};
prVelocityScalingType);
}
-void LeapFrogGpu::integrate(DeviceBuffer<float3> d_x,
- DeviceBuffer<float3> d_xp,
- DeviceBuffer<float3> d_v,
- DeviceBuffer<float3> d_f,
+void LeapFrogGpu::integrate(DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ DeviceBuffer<Float3> d_v,
+ DeviceBuffer<Float3> d_f,
const real dt,
const bool doTemperatureScaling,
gmx::ArrayRef<const t_grp_tcstat> tcstat,
"Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
"in GPU version of Leap-Frog integrator.");
prVelocityScalingMatrixDiagonal_ = dtPressureCouple
- * float3{ prVelocityScalingMatrix[XX][XX],
+ * Float3{ prVelocityScalingMatrix[XX][XX],
prVelocityScalingMatrix[YY][YY],
prVelocityScalingMatrix[ZZ][ZZ] };
}
#include "gromacs/gpu_utils/cuda_arch_utils.cuh"
#include "gromacs/gpu_utils/cudautils.cuh"
#include "gromacs/gpu_utils/devicebuffer.cuh"
-#include "gromacs/gpu_utils/gputraits.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
+#include "gromacs/gpu_utils/typecasts.cuh"
#include "gromacs/gpu_utils/vectype_ops.cuh"
#include "gromacs/math/functions.h"
#include "gromacs/math/vec.h"
return kernelPtr;
}
-void LincsGpu::apply(const float3* d_x,
- float3* d_xp,
- const bool updateVelocities,
- float3* d_v,
- const real invdt,
- const bool computeVirial,
- tensor virialScaled,
- const PbcAiuc pbcAiuc)
+void LincsGpu::apply(const DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ const bool updateVelocities,
+ DeviceBuffer<Float3> d_v,
+ const real invdt,
+ const bool computeVirial,
+ tensor virialScaled,
+ const PbcAiuc pbcAiuc)
{
ensureNoPendingDeviceError("In CUDA version of LINCS");
kernelParams_.pbcAiuc = pbcAiuc;
- const auto kernelArgs =
- prepareGpuKernelArguments(kernelPtr, config, &kernelParams_, &d_x, &d_xp, &d_v, &invdt);
+ const auto kernelArgs = prepareGpuKernelArguments(kernelPtr,
+ config,
+ &kernelParams_,
+ asFloat3Pointer(&d_x),
+ asFloat3Pointer(&d_xp),
+ asFloat3Pointer(&d_v),
+ &invdt);
launchGpuKernel(kernelPtr,
config,
#include <memory>
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/device_stream.h"
#include "gromacs/gpu_utils/gputraits.cuh"
* \param[in,out] virialScaled Scaled virial tensor to be updated.
* \param[in] pbcAiuc PBC data.
*/
- void apply(const float3* d_x,
- float3* d_xp,
- const bool updateVelocities,
- float3* d_v,
- const real invdt,
- const bool computeVirial,
- tensor virialScaled,
- const PbcAiuc pbcAiuc);
+ void apply(const DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ const bool updateVelocities,
+ DeviceBuffer<Float3> d_v,
+ const real invdt,
+ const bool computeVirial,
+ tensor virialScaled,
+ const PbcAiuc pbcAiuc);
/*! \brief
* Update data-structures (e.g. after NB search step).
#include "gromacs/gpu_utils/cuda_arch_utils.cuh"
#include "gromacs/gpu_utils/cudautils.cuh"
#include "gromacs/gpu_utils/devicebuffer.h"
-#include "gromacs/gpu_utils/gputraits.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
+#include "gromacs/gpu_utils/typecasts.cuh"
#include "gromacs/gpu_utils/vectype_ops.cuh"
#include "gromacs/math/functions.h"
#include "gromacs/math/vec.h"
return kernelPtr;
}
-void SettleGpu::apply(const float3* d_x,
- float3* d_xp,
- const bool updateVelocities,
- float3* d_v,
- const real invdt,
- const bool computeVirial,
- tensor virialScaled,
- const PbcAiuc pbcAiuc)
+void SettleGpu::apply(const DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ const bool updateVelocities,
+ DeviceBuffer<Float3> d_v,
+ const real invdt,
+ const bool computeVirial,
+ tensor virialScaled,
+ const PbcAiuc pbcAiuc)
{
ensureNoPendingDeviceError("In CUDA version SETTLE");
config.sharedMemorySize = 0;
}
- const auto kernelArgs = prepareGpuKernelArguments(
- kernelPtr, config, &numSettles_, &d_atomIds_, &settleParameters_, &d_x, &d_xp, &invdt, &d_v, &d_virialScaled_, &pbcAiuc);
+ const auto kernelArgs = prepareGpuKernelArguments(kernelPtr,
+ config,
+ &numSettles_,
+ &d_atomIds_,
+ &settleParameters_,
+ asFloat3Pointer(&d_x),
+ asFloat3Pointer(&d_xp),
+ &invdt,
+ asFloat3Pointer(&d_v),
+ &d_virialScaled_,
+ &pbcAiuc);
launchGpuKernel(kernelPtr,
config,
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gmxpre.h"
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/device_context.h"
#include "gromacs/gpu_utils/device_stream.h"
-#include "gromacs/gpu_utils/gputraits.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
#include "gromacs/math/functions.h"
#include "gromacs/math/invertmatrix.h"
#include "gromacs/math/vec.h"
* \param[in,out] virialScaled Scaled virial tensor to be updated.
* \param[in] pbcAiuc PBC data.
*/
- void apply(const float3* d_x,
- float3* d_xp,
- const bool updateVelocities,
- float3* d_v,
- const real invdt,
- const bool computeVirial,
- tensor virialScaled,
- const PbcAiuc pbcAiuc);
+ void apply(const DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ const bool updateVelocities,
+ DeviceBuffer<Float3> d_v,
+ const real invdt,
+ const bool computeVirial,
+ tensor virialScaled,
+ const PbcAiuc pbcAiuc);
/*! \brief
* Update data-structures (e.g. after NB search step).
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <vector>
#include "gromacs/gpu_utils/devicebuffer.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
#include "gromacs/hardware/device_information.h"
#include "gromacs/mdlib/lincs_gpu.cuh"
#include "gromacs/pbcutil/pbc.h"
auto lincsGpu = std::make_unique<LincsGpu>(
testData->ir_.nLincsIter, testData->ir_.nProjOrder, deviceContext, deviceStream);
- bool updateVelocities = true;
- int numAtoms = testData->numAtoms_;
- float3 *d_x, *d_xp, *d_v;
+ bool updateVelocities = true;
+ int numAtoms = testData->numAtoms_;
+
+ Float3* h_x = gmx::asGenericFloat3Pointer(testData->x_);
+ Float3* h_xp = gmx::asGenericFloat3Pointer(testData->xPrime_);
+ Float3* h_v = gmx::asGenericFloat3Pointer(testData->v_);
+
+ DeviceBuffer<Float3> d_x, d_xp, d_v;
lincsGpu->set(*testData->idef_, testData->numAtoms_, testData->invmass_.data());
PbcAiuc pbcAiuc;
allocateDeviceBuffer(&d_xp, numAtoms, deviceContext);
allocateDeviceBuffer(&d_v, numAtoms, deviceContext);
- copyToDeviceBuffer(
- &d_x, (float3*)(testData->x_.data()), 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
- copyToDeviceBuffer(
- &d_xp, (float3*)(testData->xPrime_.data()), 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyToDeviceBuffer(&d_x, h_x, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyToDeviceBuffer(&d_xp, h_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
if (updateVelocities)
{
- copyToDeviceBuffer(
- &d_v, (float3*)(testData->v_.data()), 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyToDeviceBuffer(&d_v, h_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
}
lincsGpu->apply(
d_x, d_xp, updateVelocities, d_v, testData->invdt_, testData->computeVirial_, testData->virialScaled_, pbcAiuc);
- copyFromDeviceBuffer(
- (float3*)(testData->xPrime_.data()), &d_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyFromDeviceBuffer(h_xp, &d_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
if (updateVelocities)
{
- copyFromDeviceBuffer(
- (float3*)(testData->v_.data()), &d_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyFromDeviceBuffer(h_v, &d_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
}
freeDeviceBuffer(&d_x);
# include "gromacs/mdlib/leapfrog_gpu.h"
#endif
+#include "gromacs/gpu_utils/gputraits.h"
#include "gromacs/hardware/device_information.h"
#include "gromacs/mdlib/stat.h"
int numAtoms = testData->numAtoms_;
- static_assert(sizeof(float3) == sizeof(*testData->x_.data()), "Incompatible types");
+ Float3* h_x = gmx::asGenericFloat3Pointer(testData->x_);
+ Float3* h_xp = gmx::asGenericFloat3Pointer(testData->xPrime_);
+ Float3* h_v = gmx::asGenericFloat3Pointer(testData->v_);
+ Float3* h_f = gmx::asGenericFloat3Pointer(testData->f_);
- float3* h_x = reinterpret_cast<float3*>(testData->x_.data());
- float3* h_xp = reinterpret_cast<float3*>(testData->xPrime_.data());
- float3* h_v = reinterpret_cast<float3*>(testData->v_.data());
- float3* h_f = reinterpret_cast<float3*>(testData->f_.data());
-
- DeviceBuffer<float3> d_x, d_xp, d_v, d_f;
+ DeviceBuffer<Float3> d_x, d_xp, d_v, d_f;
allocateDeviceBuffer(&d_x, numAtoms, deviceContext);
allocateDeviceBuffer(&d_xp, numAtoms, deviceContext);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <vector>
#include "gromacs/gpu_utils/devicebuffer.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
#include "gromacs/hardware/device_information.h"
#include "gromacs/mdlib/settle_gpu.cuh"
#include "gromacs/utility/unique_cptr.h"
int numAtoms = testData->numAtoms_;
- float3 *d_x, *d_xp, *d_v;
+ DeviceBuffer<Float3> d_x, d_xp, d_v;
- float3* h_x = (float3*)(as_rvec_array(testData->x_.data()));
- float3* h_xp = (float3*)(as_rvec_array(testData->xPrime_.data()));
- float3* h_v = (float3*)(as_rvec_array(testData->v_.data()));
+ Float3* h_x = gmx::asGenericFloat3Pointer(testData->x_);
+ Float3* h_xp = gmx::asGenericFloat3Pointer(testData->xPrime_);
+ Float3* h_v = gmx::asGenericFloat3Pointer(testData->v_);
allocateDeviceBuffer(&d_x, numAtoms, deviceContext);
allocateDeviceBuffer(&d_xp, numAtoms, deviceContext);
allocateDeviceBuffer(&d_v, numAtoms, deviceContext);
- copyToDeviceBuffer(&d_x, (float3*)h_x, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
- copyToDeviceBuffer(&d_xp, (float3*)h_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyToDeviceBuffer(&d_x, h_x, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyToDeviceBuffer(&d_xp, h_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
if (updateVelocities)
{
- copyToDeviceBuffer(&d_v, (float3*)h_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyToDeviceBuffer(&d_v, h_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
}
settleGpu->apply(
d_x, d_xp, updateVelocities, d_v, testData->reciprocalTimeStep_, calcVirial, testData->virial_, pbcAiuc);
- copyFromDeviceBuffer((float3*)h_xp, &d_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyFromDeviceBuffer(h_xp, &d_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
if (updateVelocities)
{
- copyFromDeviceBuffer(
- (float3*)h_v, &d_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+ copyFromDeviceBuffer(h_v, &d_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
}
freeDeviceBuffer(&d_x);
UpdateConstrainGpu::Impl::~Impl() {}
-void UpdateConstrainGpu::Impl::set(DeviceBuffer<RVec> d_x,
- DeviceBuffer<RVec> d_v,
- const DeviceBuffer<RVec> d_f,
+void UpdateConstrainGpu::Impl::set(DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_v,
+ const DeviceBuffer<Float3> d_f,
const InteractionDefinitions& idef,
const t_mdatoms& md)
{
GMX_ASSERT(d_v != nullptr, "Velocities device buffer should not be null.");
GMX_ASSERT(d_f != nullptr, "Forces device buffer should not be null.");
- d_x_ = reinterpret_cast<float3*>(d_x);
- d_v_ = reinterpret_cast<float3*>(d_v);
- d_f_ = reinterpret_cast<float3*>(d_f);
+ d_x_ = d_x;
+ d_v_ = d_v;
+ d_f_ = d_f;
numAtoms_ = md.nr;
impl_->scaleVelocities(scalingMatrix);
}
-void UpdateConstrainGpu::set(DeviceBuffer<RVec> d_x,
- DeviceBuffer<RVec> d_v,
- const DeviceBuffer<RVec> d_f,
+void UpdateConstrainGpu::set(DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_v,
+ const DeviceBuffer<Float3> d_f,
const InteractionDefinitions& idef,
const t_mdatoms& md)
{
* \param[in] idef System topology
* \param[in] md Atoms data.
*/
- void set(DeviceBuffer<RVec> d_x,
- DeviceBuffer<RVec> d_v,
- const DeviceBuffer<RVec> d_f,
+ void set(DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_v,
+ const DeviceBuffer<Float3> d_f,
const InteractionDefinitions& idef,
const t_mdatoms& md);
int numAtoms_;
//! Local copy of the pointer to the device positions buffer
- float3* d_x_;
+ DeviceBuffer<Float3> d_x_;
//! Local copy of the pointer to the device velocities buffer
- float3* d_v_;
+ DeviceBuffer<Float3> d_v_;
//! Local copy of the pointer to the device forces buffer
- float3* d_f_;
+ DeviceBuffer<Float3> d_f_;
//! Device buffer for intermediate positions (maintained internally)
- float3* d_xp_;
+ DeviceBuffer<Float3> d_xp_;
//! Number of elements in shifted coordinates buffer
int numXp_ = -1;
//! Allocation size for the shifted coordinates buffer
//! 1/mass for all atoms (GPU)
- real* d_inverseMasses_;
+ DeviceBuffer<real> d_inverseMasses_;
//! Number of elements in reciprocal masses buffer
int numInverseMasses_ = -1;
//! Allocation size for the reciprocal masses buffer
nbnxnInsertNonlocalGpuDependency(nb, interactionLoc);
}
-void* getGpuForces(NbnxmGpu* nb)
+DeviceBuffer<Float3> getGpuForces(NbnxmGpu* nb)
{
return nb->atdat->f;
}
return numAtoms;
}
-void* nonbonded_verlet_t::getGpuForces() const
+DeviceBuffer<gmx::RVec> nonbonded_verlet_t::getGpuForces() const
{
return Nbnxm::getGpuForces(gpu_nbv);
}
*
* \returns A pointer to the force buffer in GPU memory
*/
- void* getGpuForces() const;
+ DeviceBuffer<gmx::RVec> getGpuForces() const;
//! Return the kernel setup
const Nbnxm::KernelSetup& kernelSetup() const { return kernelSetup_; }
* \returns A pointer to the force buffer in GPU memory
*/
CUDA_FUNC_QUALIFIER
-void* getGpuForces(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+DeviceBuffer<gmx::RVec> getGpuForces(NbnxmGpu gmx_unused* nb)
+ CUDA_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
} // namespace Nbnxm
#endif