#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/math/vectypes.h"
+#include "gromacs/pbcutil/pbc.h"
#include "gromacs/topology/idef.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/classhelpers.h"
struct gmx_enerdata_t;
struct gmx_ffparams_t;
struct gmx_mtop_t;
-struct t_forcerec;
struct t_inputrec;
struct gmx_wallcycle;
class GpuBonded
{
public:
- //! Construct the manager with constant data and the stream to use.
+ /*! \brief Construct the manager with constant data and the stream to use.
+ *
+ * \param[in] ffparams Force-field parameters.
+ * \param[in] electrostaticsScaleFactor Scaling factor for the electrostatic potential
+ * (Coulomb constant, multiplied by the Fudge factor).
+ * \param[in] deviceContext GPU device context (not used in CUDA).
+ * \param[in] deviceStream GPU device stream.
+ * \param[in] wcycle The wallclock counter.
+ *
+ */
GpuBonded(const gmx_ffparams_t& ffparams,
+ const float electrostaticsScaleFactor,
const DeviceContext& deviceContext,
const DeviceStream& deviceStream,
gmx_wallcycle* wcycle);
* Intended to be called after each neighbour search
* stage. Copies the bonded interactions assigned to the GPU
* to device data structures, and updates device buffers that
- * may have been updated after search. */
+ * may have been updated after search.
+ *
+ * \param[in] nbnxnAtomOrder Mapping between rvec and NBNXM formats.
+ * \param[in] idef List of interactions to compute.
+ * \param[in] xqDevice Device buffer with coordinates and charge in xyzq-format.
+ * \param[in,out] forceDevice Device buffer with forces.
+ * \param[in,out] fshiftDevice Device buffer with shift forces.
+ */
void updateInteractionListsAndDeviceBuffers(ArrayRef<const int> nbnxnAtomOrder,
const InteractionDefinitions& idef,
void* xqDevice,
DeviceBuffer<RVec> forceDevice,
DeviceBuffer<RVec> fshiftDevice);
+ /*! \brief
+ * Update PBC data.
+ *
+ * Converts PBC data from t_pbc into the PbcAiuc format and stores the latter.
+ *
+ * \param[in] pbcType The type of the periodic boundary.
+ * \param[in] box The periodic boundary box matrix.
+ * \param[in] canMoleculeSpanPbc Whether one molecule can have atoms in different PBC cells.
+ */
+ void setPbc(PbcType pbcType, const matrix box, bool canMoleculeSpanPbc);
/*! \brief Returns whether there are bonded interactions
- * assigned to the GPU */
+ * assigned to the GPU
+ *
+ * \returns If the list of interaction has elements.
+ */
bool haveInteractions() const;
- /*! \brief Launches bonded kernel on a GPU */
- void launchKernel(const t_forcerec* fr, const gmx::StepWorkload& stepWork, const matrix box);
- /*! \brief Launches the transfer of computed bonded energies. */
+
+ /*! \brief Launches bonded kernel on a GPU
+ *
+ * \param[in] stepWork Simulation step work to determine if energy/virial are to be computed on this step.
+ */
+ void launchKernel(const gmx::StepWorkload& stepWork);
+
+ /*! \brief Sets the PBC and launches bonded kernel on a GPU
+ *
+ * \param[in] pbcType The type of the periodic boundary.
+ * \param[in] box The periodic boundary box matrix.
+ * \param[in] canMoleculeSpanPbc Whether one molecule can have atoms in different PBC cells.
+ * \param[in] stepWork Simulation step work to determine if energy/virial are to be computed on this step.
+ */
+ void setPbcAndlaunchKernel(PbcType pbcType,
+ const matrix box,
+ bool canMoleculeSpanPbc,
+ const gmx::StepWorkload& stepWork);
+
+ /*! \brief Launches the transfer of computed bonded energies.
+ */
void launchEnergyTransfer();
- /*! \brief Waits on the energy transfer, and accumulates bonded energies to \c enerd. */
+
+ /*! \brief Waits on the energy transfer, and accumulates bonded energies to \c enerd.
+ *
+ * \param[in,out] The energy data object to add energy terms to.
+ */
void waitAccumulateEnergyTerms(gmx_enerdata_t* enerd);
- /*! \brief Clears the device side energy buffer */
+
+ /*! \brief Clears the device side energy buffer
+ */
void clearEnergies();
private:
};
GpuBonded::GpuBonded(const gmx_ffparams_t& /* ffparams */,
+ const float /* electrostaticsScaleFactor */,
const DeviceContext& /* deviceContext */,
const DeviceStream& /* deviceStream */,
gmx_wallcycle* /* wcycle */) :
{
}
+void GpuBonded::setPbc(PbcType /* pbcType */, const matrix /* box */, bool /* canMoleculeSpanPbc */)
+{
+}
+
bool GpuBonded::haveInteractions() const
{
return !impl_;
}
-void GpuBonded::launchKernel(const t_forcerec* /* fr */,
- const gmx::StepWorkload& /* stepWork */,
- const matrix /* box */)
+void GpuBonded::launchKernel(const gmx::StepWorkload& /* stepWork */) {}
+
+void GpuBonded::setPbcAndlaunchKernel(PbcType /* pbcType */,
+ const matrix /* box */,
+ bool /* canMoleculeSpanPbc */,
+ const gmx::StepWorkload& /* stepWork */)
{
}
namespace gmx
{
+// Number of CUDA threads in a block
+constexpr static int c_threadsPerBlock = 256;
// ---- GpuBonded::Impl
GpuBonded::Impl::Impl(const gmx_ffparams_t& ffparams,
+ const float electrostaticsScaleFactor,
const DeviceContext& deviceContext,
const DeviceStream& deviceStream,
gmx_wallcycle* wcycle) :
GMX_RELEASE_ASSERT(deviceStream.isValid(),
"Can't run GPU version of bonded forces in stream that is not valid.");
+ static_assert(c_threadsPerBlock >= SHIFTS,
+ "Threads per block in GPU bonded must be >= SHIFTS for the virial kernel "
+ "(calcVir=true)");
+
wcycle_ = wcycle;
allocateDeviceBuffer(&d_forceParams_, ffparams.numTypes(), deviceContext_);
allocateDeviceBuffer(&d_vTot_, F_NRE, deviceContext_);
clearDeviceBufferAsync(&d_vTot_, 0, F_NRE, deviceStream_);
- kernelParams_.d_forceParams = d_forceParams_;
- kernelParams_.d_xq = d_xq_;
- kernelParams_.d_f = d_f_;
- kernelParams_.d_fShift = d_fShift_;
- kernelParams_.d_vTot = d_vTot_;
+ kernelParams_.electrostaticsScaleFactor = electrostaticsScaleFactor;
+ kernelParams_.d_forceParams = d_forceParams_;
+ kernelParams_.d_xq = d_xq_;
+ kernelParams_.d_f = d_f_;
+ kernelParams_.d_fShift = d_fShift_;
+ kernelParams_.d_vTot = d_vTot_;
for (int i = 0; i < numFTypesOnGpu; i++)
{
kernelParams_.d_iatoms[i] = nullptr;
kernelParams_.fTypeRangeStart[i] = 0;
kernelParams_.fTypeRangeEnd[i] = -1;
}
+
+ int fTypeRangeEnd = kernelParams_.fTypeRangeEnd[numFTypesOnGpu - 1];
+
+ kernelLaunchConfig_.blockSize[0] = c_threadsPerBlock;
+ kernelLaunchConfig_.blockSize[1] = 1;
+ kernelLaunchConfig_.blockSize[2] = 1;
+ kernelLaunchConfig_.gridSize[0] = (fTypeRangeEnd + c_threadsPerBlock) / c_threadsPerBlock;
+ kernelLaunchConfig_.gridSize[1] = 1;
+ kernelLaunchConfig_.gridSize[2] = 1;
}
GpuBonded::Impl::~Impl()
fTypesCounter++;
}
+ int fTypeRangeEnd = kernelParams_.fTypeRangeEnd[numFTypesOnGpu - 1];
+ kernelLaunchConfig_.gridSize[0] = (fTypeRangeEnd + c_threadsPerBlock) / c_threadsPerBlock;
+
d_xq_ = static_cast<float4*>(d_xqPtr);
d_f_ = asFloat3(d_fPtr);
d_fShift_ = asFloat3(d_fShiftPtr);
// TODO wallcycle sub stop
}
+void GpuBonded::Impl::setPbc(PbcType pbcType, const matrix box, bool canMoleculeSpanPbc)
+{
+ PbcAiuc pbcAiuc;
+ setPbcAiuc(canMoleculeSpanPbc ? numPbcDimensions(pbcType) : 0, box, &pbcAiuc);
+ kernelParams_.pbcAiuc = pbcAiuc;
+}
+
bool GpuBonded::Impl::haveInteractions() const
{
return haveInteractions_;
// ---- GpuBonded
GpuBonded::GpuBonded(const gmx_ffparams_t& ffparams,
+ const float electrostaticsScaleFactor,
const DeviceContext& deviceContext,
const DeviceStream& deviceStream,
gmx_wallcycle* wcycle) :
- impl_(new Impl(ffparams, deviceContext, deviceStream, wcycle))
+ impl_(new Impl(ffparams, electrostaticsScaleFactor, deviceContext, deviceStream, wcycle))
{
}
impl_->updateInteractionListsAndDeviceBuffers(nbnxnAtomOrder, idef, d_xq, d_f, d_fShift);
}
+void GpuBonded::setPbc(PbcType pbcType, const matrix box, bool canMoleculeSpanPbc)
+{
+ impl_->setPbc(pbcType, box, canMoleculeSpanPbc);
+}
+
bool GpuBonded::haveInteractions() const
{
return impl_->haveInteractions();
}
+void GpuBonded::setPbcAndlaunchKernel(PbcType pbcType,
+ const matrix box,
+ bool canMoleculeSpanPbc,
+ const gmx::StepWorkload& stepWork)
+{
+ setPbc(pbcType, box, canMoleculeSpanPbc);
+ launchKernel(stepWork);
+}
+
void GpuBonded::launchEnergyTransfer()
{
impl_->launchEnergyTransfer();
//! Periodic boundary data
PbcAiuc pbcAiuc;
//! Scale factor
- float scaleFactor;
+ float electrostaticsScaleFactor;
//! The bonded types on GPU
int fTypesOnGpu[numFTypesOnGpu];
//! The number of interaction atom (iatom) elements for every function type
setPbcAiuc(0, boxDummy, &pbcAiuc);
- scaleFactor = 1.0;
- d_forceParams = nullptr;
- d_xq = nullptr;
- d_f = nullptr;
- d_fShift = nullptr;
- d_vTot = nullptr;
+ electrostaticsScaleFactor = 1.0;
+ d_forceParams = nullptr;
+ d_xq = nullptr;
+ d_f = nullptr;
+ d_fShift = nullptr;
+ d_vTot = nullptr;
}
};
public:
//! Constructor
Impl(const gmx_ffparams_t& ffparams,
+ const float electrostaticsScaleFactor,
const DeviceContext& deviceContext,
const DeviceStream& deviceStream,
gmx_wallcycle* wcycle);
void* xqDevice,
DeviceBuffer<RVec> forceDevice,
DeviceBuffer<RVec> fshiftDevice);
+ /*! \brief
+ * Update PBC data.
+ *
+ * Converts PBC data from t_pbc into the PbcAiuc format and stores the latter.
+ *
+ * \param[in] pbcType The type of the periodic boundary.
+ * \param[in] box The periodic boundary box matrix.
+ * \param[in] canMoleculeSpanPbc Whether one molecule can have atoms in different PBC cells.
+ */
+ void setPbc(PbcType pbcType, const matrix box, bool canMoleculeSpanPbc);
/*! \brief Launches bonded kernel on a GPU */
template<bool calcVir, bool calcEner>
- void launchKernel(const t_forcerec* fr, const matrix box);
+ void launchKernel();
/*! \brief Returns whether there are bonded interactions
* assigned to the GPU */
bool haveInteractions() const;
//! Parameters and pointers, passed to the CUDA kernel
BondedCudaKernelParameters kernelParams_;
+ //! GPU kernel launch configuration
+ KernelLaunchConfig kernelLaunchConfig_;
+
//! \brief Pointer to wallcycle structure.
gmx_wallcycle* wcycle_;
};
#include "gromacs/listed_forces/gpubonded.h"
#include "gromacs/math/units.h"
#include "gromacs/mdlib/force_flags.h"
-#include "gromacs/mdtypes/forcerec.h"
#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/simulation_workload.h"
-#include "gromacs/pbcutil/pbc.h"
#include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
#include "gromacs/utility/gmxassert.h"
# include <limits>
#endif
-// CUDA threads per block
-#define TPB_BONDED 256
-
/*-------------------------------- CUDA kernels-------------------------------- */
/*------------------------------------------------------------------------------*/
kernelParams.d_f, sm_fShiftLoc, kernelParams.pbcAiuc);
break;
case F_LJ14:
- pairs_gpu<calcVir, calcEner>(fTypeTid, numBonds, iatoms, kernelParams.d_forceParams,
- kernelParams.d_xq, kernelParams.d_f, sm_fShiftLoc,
- kernelParams.pbcAiuc, kernelParams.scaleFactor,
- &vtotVdw_loc, &vtotElec_loc);
+ pairs_gpu<calcVir, calcEner>(
+ fTypeTid, numBonds, iatoms, kernelParams.d_forceParams,
+ kernelParams.d_xq, kernelParams.d_f, sm_fShiftLoc, kernelParams.pbcAiuc,
+ kernelParams.electrostaticsScaleFactor, &vtotVdw_loc, &vtotElec_loc);
break;
}
break;
template<bool calcVir, bool calcEner>
-void GpuBonded::Impl::launchKernel(const t_forcerec* fr, const matrix box)
+void GpuBonded::Impl::launchKernel()
{
GMX_ASSERT(haveInteractions_,
"Cannot launch bonded GPU kernels unless bonded GPU work was scheduled");
- static_assert(TPB_BONDED >= SHIFTS,
- "TPB_BONDED must be >= SHIFTS for the virial kernel (calcVir=true)");
-
- PbcAiuc pbcAiuc;
- setPbcAiuc(fr->bMolPBC ? numPbcDimensions(fr->pbcType) : 0, box, &pbcAiuc);
int fTypeRangeEnd = kernelParams_.fTypeRangeEnd[numFTypesOnGpu - 1];
return;
}
- KernelLaunchConfig config;
- config.blockSize[0] = TPB_BONDED;
- config.blockSize[1] = 1;
- config.blockSize[2] = 1;
- config.gridSize[0] = (fTypeRangeEnd + TPB_BONDED) / TPB_BONDED;
- config.gridSize[1] = 1;
- config.gridSize[2] = 1;
-
- auto kernelPtr = exec_kernel_gpu<calcVir, calcEner>;
- kernelParams_.scaleFactor = fr->ic->epsfac * fr->fudgeQQ;
- kernelParams_.pbcAiuc = pbcAiuc;
+ auto kernelPtr = exec_kernel_gpu<calcVir, calcEner>;
- const auto kernelArgs = prepareGpuKernelArguments(kernelPtr, config, &kernelParams_);
+ const auto kernelArgs = prepareGpuKernelArguments(kernelPtr, kernelLaunchConfig_, &kernelParams_);
- launchGpuKernel(kernelPtr, config, deviceStream_, nullptr, "exec_kernel_gpu<calcVir, calcEner>",
- kernelArgs);
+ launchGpuKernel(kernelPtr, kernelLaunchConfig_, deviceStream_, nullptr,
+ "exec_kernel_gpu<calcVir, calcEner>", kernelArgs);
}
-void GpuBonded::launchKernel(const t_forcerec* fr, const gmx::StepWorkload& stepWork, const matrix box)
+void GpuBonded::launchKernel(const gmx::StepWorkload& stepWork)
{
if (stepWork.computeEnergy)
{
// When we need the energy, we also need the virial
- impl_->launchKernel<true, true>(fr, box);
+ impl_->launchKernel<true, true>();
}
else if (stepWork.computeVirial)
{
- impl_->launchKernel<true, false>(fr, box);
+ impl_->launchKernel<true, false>();
}
else
{
- impl_->launchKernel<false, false>(fr, box);
+ impl_->launchKernel<false, false>();
}
}
if (domainWork.haveGpuBondedWork && !havePPDomainDecomposition(cr))
{
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED);
- fr->gpuBonded->launchKernel(fr, stepWork, box);
+ fr->gpuBonded->setPbcAndlaunchKernel(fr->pbcType, box, fr->bMolPBC, stepWork);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
}
if (domainWork.haveGpuBondedWork)
{
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED);
- fr->gpuBonded->launchKernel(fr, stepWork, box);
+ fr->gpuBonded->setPbcAndlaunchKernel(fr->pbcType, box, fr->bMolPBC, stepWork);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
}
"GPU device stream manager should be valid in order to use GPU "
"version of bonded forces.");
gpuBonded = std::make_unique<GpuBonded>(
- mtop.ffparams, deviceStreamManager->context(),
+ mtop.ffparams, fr->ic->epsfac * fr->fudgeQQ, deviceStreamManager->context(),
deviceStreamManager->bondedStream(havePPDomainDecomposition(cr)), wcycle);
fr->gpuBonded = gpuBonded.get();
}