/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
*/
#include "gromacs/gpu_utils/gmxopencl.h"
+#include "gromacs/math/vectypes.h"
using DeviceTexture = void*;
//! \brief Single GPU call timing event
using CommandEvent = cl_event;
+//! Convenience alias for 2-wide float
+using Float2 = cl_float2;
+
+//! Convenience alias for 3-wide float. Not using cl_float3 due to alignment issues.
+using Float3 = gmx::RVec;
+
+//! Convenience alias for 4-wide float.
+using Float4 = cl_float4;
+
/*! \internal \brief
* GPU kernels scheduling description. This is same in OpenCL/CUDA.
* Provides reasonable defaults, one typically only needs to set the GPU stream
}
/* HtoD x, q */
- GMX_ASSERT(sizeof(float) == sizeof(*nbatom->x().data()),
- "The size of the xyzq buffer element should be equal to the size of float4.");
+ static_assert(sizeof(float) == sizeof(*nbatom->x().data()),
+ "The size of the xyzq buffer element should be equal to the size of float4.");
copyToDeviceBuffer(&adat->xq,
- nbatom->x().data() + adat_begin * 4,
- adat_begin * 4,
- adat_len * 4,
+ reinterpret_cast<const Float4*>(nbatom->x().data()) + adat_begin,
+ adat_begin,
+ adat_len,
deviceStream,
GpuApiCallBehavior::Async,
bDoTime ? t->xf[atomLocality].nb_h2d.fetchNextEvent() : nullptr);
/* DtoH f */
GMX_ASSERT(sizeof(*nbatom->out[0].f.data()) == sizeof(float),
"The host force buffer should be in single precision to match device data size.");
- copyFromDeviceBuffer(&nbatom->out[0].f[adat_begin * DIM],
+ copyFromDeviceBuffer(reinterpret_cast<Float3*>(nbatom->out[0].f.data()) + adat_begin,
&adat->f,
- adat_begin * DIM,
- adat_len * DIM,
+ adat_begin,
+ adat_len,
deviceStream,
GpuApiCallBehavior::Async,
bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
/* DtoH fshift when virial is needed */
if (stepWork.computeVirial)
{
- GMX_ASSERT(sizeof(*nb->nbst.fshift) == DIM * sizeof(float),
- "Sizes of host- and device-side shift vector elements should be the same.");
- copyFromDeviceBuffer(reinterpret_cast<float*>(nb->nbst.fshift),
+ static_assert(
+ sizeof(*nb->nbst.fshift) == sizeof(Float3),
+ "Sizes of host- and device-side shift vector elements should be the same.");
+ copyFromDeviceBuffer(nb->nbst.fshift,
&adat->fshift,
0,
- SHIFTS * DIM,
+ SHIFTS,
deviceStream,
GpuApiCallBehavior::Async,
bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
/* DtoH energies */
if (stepWork.computeEnergy)
{
- GMX_ASSERT(sizeof(*nb->nbst.e_lj) == sizeof(float),
- "Sizes of host- and device-side LJ energy terms should be the same.");
+ static_assert(sizeof(*nb->nbst.e_lj) == sizeof(float),
+ "Sizes of host- and device-side LJ energy terms should be the same.");
copyFromDeviceBuffer(nb->nbst.e_lj,
&adat->e_lj,
0,
deviceStream,
GpuApiCallBehavior::Async,
bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
- GMX_ASSERT(sizeof(*nb->nbst.e_el) == sizeof(float),
- "Sizes of host- and device-side electrostatic energy terms should be the "
- "same.");
+ static_assert(sizeof(*nb->nbst.e_el) == sizeof(float),
+ "Sizes of host- and device-side electrostatic energy terms should be the "
+ "same.");
copyFromDeviceBuffer(nb->nbst.e_el,
&adat->e_el,
0,
cl_atomdata_t* atomData = nb->atdat;
const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
- clearDeviceBufferAsync(&atomData->f, 0, natoms_clear * DIM, localStream);
+ clearDeviceBufferAsync(&atomData->f, 0, natoms_clear, localStream);
}
//! This function is documented in the header file
/* only if we have a dynamic box */
if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
{
- GMX_ASSERT(sizeof(float) * DIM == sizeof(*nbatom->shift_vec.data()),
- "Sizes of host- and device-side shift vectors should be the same.");
+ static_assert(sizeof(Float3) == sizeof(nbatom->shift_vec[0]),
+ "Sizes of host- and device-side shift vectors should be the same.");
copyToDeviceBuffer(&adat->shift_vec,
- reinterpret_cast<const float*>(nbatom->shift_vec.data()),
+ reinterpret_cast<const Float3*>(nbatom->shift_vec.data()),
0,
- SHIFTS * DIM,
+ SHIFTS,
localStream,
GpuApiCallBehavior::Async,
nullptr);
}
- allocateDeviceBuffer(&d_atdat->f, nalloc * DIM, deviceContext);
- allocateDeviceBuffer(&d_atdat->xq, nalloc * (DIM + 1), deviceContext);
+ allocateDeviceBuffer(&d_atdat->f, nalloc, deviceContext);
+ allocateDeviceBuffer(&d_atdat->xq, nalloc, deviceContext);
if (useLjCombRule(nb->nbparam->vdwType))
{
// Two Lennard-Jones parameters per atom
- allocateDeviceBuffer(&d_atdat->lj_comb, nalloc * 2, deviceContext);
+ allocateDeviceBuffer(&d_atdat->lj_comb, nalloc, deviceContext);
}
else
{
if (useLjCombRule(nb->nbparam->vdwType))
{
- GMX_ASSERT(sizeof(float) == sizeof(*nbat->params().lj_comb.data()),
- "Size of the LJ parameters element should be equal to the size of float2.");
+ static_assert(sizeof(float) == sizeof(*nbat->params().lj_comb.data()),
+ "Size of the LJ parameters element should be equal to the size of float2.");
copyToDeviceBuffer(&d_atdat->lj_comb,
- nbat->params().lj_comb.data(),
+ reinterpret_cast<const Float2*>(nbat->params().lj_comb.data()),
0,
- 2 * natoms,
+ natoms,
localStream,
GpuApiCallBehavior::Async,
bDoTime ? timers->atdat.fetchNextEvent() : nullptr);
}
else
{
- GMX_ASSERT(sizeof(int) == sizeof(*nbat->params().type.data()),
- "Sizes of host- and device-side atom types should be the same.");
+ static_assert(sizeof(int) == sizeof(*nbat->params().type.data()),
+ "Sizes of host- and device-side atom types should be the same.");
copyToDeviceBuffer(&d_atdat->atom_types,
nbat->params().type.data(),
0,
//! electrostatic energy
float* e_el = nullptr;
//! float3 buffer with shift forces
- float (*fshift)[3] = nullptr;
+ Float3* fshift = nullptr;
};
/*! \internal
int nalloc;
//! float4 buffer with atom coordinates + charges, size natoms
- DeviceBuffer<float> xq;
+ DeviceBuffer<Float4> xq;
//! float3 buffer with force output array, size natoms
- DeviceBuffer<float> f;
+ DeviceBuffer<Float3> f;
//! LJ energy output, size 1
DeviceBuffer<float> e_lj;
DeviceBuffer<float> e_el;
//! float3 buffer with shift forces
- DeviceBuffer<float> fshift;
+ DeviceBuffer<Float3> fshift;
//! number of atom types
int ntypes;
//! int buffer with atom type indices, size natoms
DeviceBuffer<int> atom_types;
//! float2 buffer with sqrt(c6),sqrt(c12), size natoms
- DeviceBuffer<float> lj_comb;
+ DeviceBuffer<Float2> lj_comb;
//! float3 buffer with shifts values
- DeviceBuffer<float> shift_vec;
+ DeviceBuffer<Float3> shift_vec;
//! true if the shift vector has been uploaded
bool bShiftVecUploaded;