namespace Nbnxm
{
-inline void issueClFlushInStream(const DeviceStream& deviceStream)
+static inline void issueClFlushInStream(const DeviceStream& deviceStream)
{
#if GMX_GPU_OPENCL
/* Based on the v1.2 section 5.13 of the OpenCL spec, a flush is needed
#endif
}
-void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
- NBParamGpu* nbp,
- const DeviceContext& deviceContext)
+static inline void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
+ NBParamGpu* nbp,
+ const DeviceContext& deviceContext)
{
if (nbp->coulomb_tab)
{
- destroyParamLookupTable(&nbp->coulomb_tab, nbp->coulomb_tab_texobj);
+ destroyParamLookupTable(&nbp->coulomb_tab, &nbp->coulomb_tab_texobj);
}
nbp->coulomb_tab_scale = tables.scale;
&nbp->coulomb_tab, &nbp->coulomb_tab_texobj, tables.tableF.data(), tables.tableF.size(), deviceContext);
}
-enum ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic,
- const DeviceInformation gmx_unused& deviceInfo)
+static inline ElecType nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic,
+ const DeviceInformation gmx_unused& deviceInfo)
{
bool bTwinCut = (ic.rcoulomb != ic.rvdw);
}
}
-void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t& ic, const PairlistParams& listParams)
+static inline void set_cutoff_parameters(NBParamGpu* nbp,
+ const interaction_const_t& ic,
+ const PairlistParams& listParams)
{
nbp->ewald_beta = ic.ewaldcoeff_q;
nbp->sh_ewald = ic.sh_ewald;
nbp->vdw_switch = ic.vdw_switch;
}
-void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t& ic)
-{
- if (!nbv || !nbv->useGpu())
- {
- return;
- }
- NbnxmGpu* nb = nbv->gpu_nbv;
- NBParamGpu* nbp = nb->nbparam;
-
- set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
-
- nbp->elecType = nbnxn_gpu_pick_ewald_kernel_type(ic, nb->deviceContext_->deviceInfo());
-
- GMX_RELEASE_ASSERT(ic.coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
- init_ewald_coulomb_force_table(*ic.coulombEwaldTables, nbp, *nb->deviceContext_);
-}
-
-void init_plist(gpu_plist* pl)
+static inline void init_plist(gpu_plist* pl)
{
/* initialize to nullptr pointers to data that is not allocated here and will
need reallocation in nbnxn_gpu_init_pairlist */
pl->rollingPruningPart = 0;
}
-void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
+static inline void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
{
t->nb_h2d_t = 0.0;
t->nb_d2h_t = 0.0;
}
/*! \brief Initialize \p atomdata first time; it only gets filled at pair-search. */
-static void initAtomdataFirst(NBAtomData* atomdata,
- int numTypes,
- const DeviceContext& deviceContext,
- const DeviceStream& localStream)
+static inline void initAtomdataFirst(NBAtomDataGpu* atomdata,
+ int numTypes,
+ const DeviceContext& deviceContext,
+ const DeviceStream& localStream)
{
atomdata->numTypes = numTypes;
- allocateDeviceBuffer(&atomdata->shiftVec, SHIFTS, deviceContext);
+ allocateDeviceBuffer(&atomdata->shiftVec, gmx::c_numShiftVectors, deviceContext);
atomdata->shiftVecUploaded = false;
- allocateDeviceBuffer(&atomdata->fShift, SHIFTS, deviceContext);
+ allocateDeviceBuffer(&atomdata->fShift, gmx::c_numShiftVectors, deviceContext);
allocateDeviceBuffer(&atomdata->eLJ, 1, deviceContext);
allocateDeviceBuffer(&atomdata->eElec, 1, deviceContext);
- clearDeviceBufferAsync(&atomdata->fShift, 0, SHIFTS, localStream);
+ clearDeviceBufferAsync(&atomdata->fShift, 0, gmx::c_numShiftVectors, localStream);
clearDeviceBufferAsync(&atomdata->eElec, 0, 1, localStream);
clearDeviceBufferAsync(&atomdata->eLJ, 0, 1, localStream);
atomdata->numAtomsAlloc = -1;
}
+static inline VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t& ic,
+ LJCombinationRule ljCombinationRule)
+{
+ if (ic.vdwtype == VanDerWaalsType::Cut)
+ {
+ switch (ic.vdw_modifier)
+ {
+ case InteractionModifiers::None:
+ case InteractionModifiers::PotShift:
+ switch (ljCombinationRule)
+ {
+ case LJCombinationRule::None: return VdwType::Cut;
+ case LJCombinationRule::Geometric: return VdwType::CutCombGeom;
+ case LJCombinationRule::LorentzBerthelot: return VdwType::CutCombLB;
+ default:
+ GMX_THROW(gmx::InconsistentInputError(gmx::formatString(
+ "The requested LJ combination rule %s is not implemented in "
+ "the GPU accelerated kernels!",
+ enumValueToString(ljCombinationRule))));
+ }
+ case InteractionModifiers::ForceSwitch: return VdwType::FSwitch;
+ case InteractionModifiers::PotSwitch: return VdwType::PSwitch;
+ default:
+ GMX_THROW(gmx::InconsistentInputError(
+ gmx::formatString("The requested VdW interaction modifier %s is not "
+ "implemented in the GPU accelerated kernels!",
+ enumValueToString(ic.vdw_modifier))));
+ }
+ }
+ else if (ic.vdwtype == VanDerWaalsType::Pme)
+ {
+ if (ic.ljpme_comb_rule == LongRangeVdW::Geom)
+ {
+ GMX_RELEASE_ASSERT(
+ ljCombinationRule == LJCombinationRule::Geometric,
+ "Combination rules for long- and short-range interactions should match.");
+ return VdwType::EwaldGeom;
+ }
+ else
+ {
+ GMX_RELEASE_ASSERT(
+ ljCombinationRule == LJCombinationRule::LorentzBerthelot,
+ "Combination rules for long- and short-range interactions should match.");
+ return VdwType::EwaldLB;
+ }
+ }
+ else
+ {
+ GMX_THROW(gmx::InconsistentInputError(gmx::formatString(
+ "The requested VdW type %s is not implemented in the GPU accelerated kernels!",
+ enumValueToString(ic.vdwtype))));
+ }
+}
+
+static inline ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t& ic,
+ const DeviceInformation& deviceInfo)
+{
+ if (ic.eeltype == CoulombInteractionType::Cut)
+ {
+ return ElecType::Cut;
+ }
+ else if (EEL_RF(ic.eeltype))
+ {
+ return ElecType::RF;
+ }
+ else if ((EEL_PME(ic.eeltype) || ic.eeltype == CoulombInteractionType::Ewald))
+ {
+ return nbnxn_gpu_pick_ewald_kernel_type(ic, deviceInfo);
+ }
+ else
+ {
+ /* Shouldn't happen, as this is checked when choosing Verlet-scheme */
+ GMX_THROW(gmx::InconsistentInputError(
+ gmx::formatString("The requested electrostatics type %s is not implemented in "
+ "the GPU accelerated kernels!",
+ enumValueToString(ic.eeltype))));
+ }
+}
+
/*! \brief Initialize the nonbonded parameter data structure. */
-static void initNbparam(NBParamGpu* nbp,
- const interaction_const_t& ic,
- const PairlistParams& listParams,
- const nbnxn_atomdata_t::Params& nbatParams,
- const DeviceContext& deviceContext)
+static inline void initNbparam(NBParamGpu* nbp,
+ const interaction_const_t& ic,
+ const PairlistParams& listParams,
+ const nbnxn_atomdata_t::Params& nbatParams,
+ const DeviceContext& deviceContext)
{
const int numTypes = nbatParams.numTypes;
{
auto* nb = new NbnxmGpu();
nb->deviceContext_ = &deviceStreamManager.context();
- nb->atdat = new NBAtomData;
+ nb->atdat = new NBAtomDataGpu;
nb->nbparam = new NBParamGpu;
nb->plist[InteractionLocality::Local] = new Nbnxm::gpu_plist;
if (bLocalAndNonlocal)
nb->timers = new Nbnxm::GpuTimers();
snew(nb->timings, 1);
- /* WARNING: CUDA timings are incorrect with multiple streams.
- * This is the main reason why they are disabled by default.
- * Can be enabled by setting GMX_ENABLE_GPU_TIMING environment variable.
- * TODO: Consider turning on by default when we can detect nr of streams.
- *
- * OpenCL timing is enabled by default and can be disabled by
- * GMX_DISABLE_GPU_TIMING environment variable.
- *
- * Timing is disabled in SYCL.
- */
- nb->bDoTime = (GMX_GPU_CUDA && (getenv("GMX_ENABLE_GPU_TIMING") != nullptr))
- || (GMX_GPU_OPENCL && (getenv("GMX_DISABLE_GPU_TIMING") == nullptr));
+ nb->bDoTime = decideGpuTimingsUsage();
if (nb->bDoTime)
{
/* init nbst */
pmalloc(reinterpret_cast<void**>(&nb->nbst.eLJ), sizeof(*nb->nbst.eLJ));
pmalloc(reinterpret_cast<void**>(&nb->nbst.eElec), sizeof(*nb->nbst.eElec));
- pmalloc(reinterpret_cast<void**>(&nb->nbst.fShift), SHIFTS * sizeof(*nb->nbst.fShift));
+ pmalloc(reinterpret_cast<void**>(&nb->nbst.fShift), gmx::c_numShiftVectors * sizeof(*nb->nbst.fShift));
init_plist(nb->plist[InteractionLocality::Local]);
return nb;
}
+void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t& ic)
+{
+ if (!nbv || !nbv->useGpu())
+ {
+ return;
+ }
+ NbnxmGpu* nb = nbv->gpu_nbv;
+ NBParamGpu* nbp = nb->nbparam;
+
+ set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
+
+ nbp->elecType = nbnxn_gpu_pick_ewald_kernel_type(ic, nb->deviceContext_->deviceInfo());
+
+ GMX_RELEASE_ASSERT(ic.coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
+ init_ewald_coulomb_force_table(*ic.coulombEwaldTables, nbp, *nb->deviceContext_);
+}
+
void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
{
- NBAtomData* adat = nb->atdat;
+ NBAtomDataGpu* adat = nb->atdat;
const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
/* only if we have a dynamic box */
copyToDeviceBuffer(&adat->shiftVec,
gmx::asGenericFloat3Pointer(nbatom->shift_vec),
0,
- SHIFTS,
+ gmx::c_numShiftVectors,
localStream,
GpuApiCallBehavior::Async,
nullptr);
{
bool bDoTime = nb->bDoTime;
Nbnxm::GpuTimers* timers = bDoTime ? nb->timers : nullptr;
- NBAtomData* atdat = nb->atdat;
+ NBAtomDataGpu* atdat = nb->atdat;
const DeviceContext& deviceContext = *nb->deviceContext_;
const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
{
- NBAtomData* adat = nb->atdat;
+ NBAtomDataGpu* adat = nb->atdat;
const DeviceStream& localStream = *nb->deviceStreams[InteractionLocality::Local];
// Clear forces
clearDeviceBufferAsync(&adat->f, 0, nb->atdat->numAtoms, localStream);
// Clear shift force array and energies if the outputs were used in the current step
if (computeVirial)
{
- clearDeviceBufferAsync(&adat->fShift, 0, SHIFTS, localStream);
+ clearDeviceBufferAsync(&adat->fShift, 0, gmx::c_numShiftVectors, localStream);
clearDeviceBufferAsync(&adat->eLJ, 0, 1, localStream);
clearDeviceBufferAsync(&adat->eElec, 0, 1, localStream);
}
|| (nb->nbparam->elecType == ElecType::EwaldAnaTwin));
}
-enum ElecType nbnxmGpuPickElectrostaticsKernelType(const interaction_const_t& ic,
- const DeviceInformation& deviceInfo)
-{
- if (ic.eeltype == CoulombInteractionType::Cut)
- {
- return ElecType::Cut;
- }
- else if (EEL_RF(ic.eeltype))
- {
- return ElecType::RF;
- }
- else if ((EEL_PME(ic.eeltype) || ic.eeltype == CoulombInteractionType::Ewald))
- {
- return nbnxn_gpu_pick_ewald_kernel_type(ic, deviceInfo);
- }
- else
- {
- /* Shouldn't happen, as this is checked when choosing Verlet-scheme */
- GMX_THROW(gmx::InconsistentInputError(
- gmx::formatString("The requested electrostatics type %s is not implemented in "
- "the GPU accelerated kernels!",
- enumValueToString(ic.eeltype))));
- }
-}
-
-
-enum VdwType nbnxmGpuPickVdwKernelType(const interaction_const_t& ic, LJCombinationRule ljCombinationRule)
-{
- if (ic.vdwtype == VanDerWaalsType::Cut)
- {
- switch (ic.vdw_modifier)
- {
- case InteractionModifiers::None:
- case InteractionModifiers::PotShift:
- switch (ljCombinationRule)
- {
- case LJCombinationRule::None: return VdwType::Cut;
- case LJCombinationRule::Geometric: return VdwType::CutCombGeom;
- case LJCombinationRule::LorentzBerthelot: return VdwType::CutCombLB;
- default:
- GMX_THROW(gmx::InconsistentInputError(gmx::formatString(
- "The requested LJ combination rule %s is not implemented in "
- "the GPU accelerated kernels!",
- enumValueToString(ljCombinationRule))));
- }
- case InteractionModifiers::ForceSwitch: return VdwType::FSwitch;
- case InteractionModifiers::PotSwitch: return VdwType::PSwitch;
- default:
- GMX_THROW(gmx::InconsistentInputError(
- gmx::formatString("The requested VdW interaction modifier %s is not "
- "implemented in the GPU accelerated kernels!",
- enumValueToString(ic.vdw_modifier))));
- }
- }
- else if (ic.vdwtype == VanDerWaalsType::Pme)
- {
- if (ic.ljpme_comb_rule == LongRangeVdW::Geom)
- {
- assert(ljCombinationRule == LJCombinationRule::Geometric);
- return VdwType::EwaldGeom;
- }
- else
- {
- assert(ljCombinationRule == LJCombinationRule::LorentzBerthelot);
- return VdwType::EwaldLB;
- }
- }
- else
- {
- GMX_THROW(gmx::InconsistentInputError(gmx::formatString(
- "The requested VdW type %s is not implemented in the GPU accelerated kernels!",
- enumValueToString(ic.vdwtype))));
- }
-}
-
-void setupGpuShortRangeWork(NbnxmGpu* nb, const gmx::GpuBonded* gpuBonded, const gmx::InteractionLocality iLocality)
+void setupGpuShortRangeWork(NbnxmGpu* nb,
+ const gmx::ListedForcesGpu* listedForcesGpu,
+ const gmx::InteractionLocality iLocality)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
// interaction locality contains entries or if there is any
// bonded work (as this is not split into local/nonlocal).
nb->haveWork[iLocality] = ((nb->plist[iLocality]->nsci != 0)
- || (gpuBonded != nullptr && gpuBonded->haveInteractions()));
+ || (listedForcesGpu != nullptr && listedForcesGpu->haveInteractions()));
}
bool haveGpuShortRangeWork(const NbnxmGpu* nb, const gmx::InteractionLocality interactionLocality)
"beginning of the copy back function.");
/* extract the data */
- NBAtomData* adat = nb->atdat;
+ NBAtomDataGpu* adat = nb->atdat;
Nbnxm::GpuTimers* timers = nb->timers;
bool bDoTime = nb->bDoTime;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
copyFromDeviceBuffer(nb->nbst.fShift,
&adat->fShift,
0,
- SHIFTS,
+ gmx::c_numShiftVectors,
deviceStream,
GpuApiCallBehavior::Async,
bDoTime ? timers->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
const InteractionLocality iloc = atomToInteractionLocality(atomLocality);
- NBAtomData* adat = nb->atdat;
+ NBAtomDataGpu* adat = nb->atdat;
gpu_plist* plist = nb->plist[iloc];
Nbnxm::GpuTimers* timers = nb->timers;
const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
nbnxnInsertNonlocalGpuDependency(gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
}
+//! This function is documented in the header file
+void gpu_free(NbnxmGpu* nb)
+{
+ if (nb == nullptr)
+ {
+ return;
+ }
+
+ gpu_free_platform_specific(nb);
+
+ delete nb->timers;
+ sfree(nb->timings);
+
+ NBAtomDataGpu* atdat = nb->atdat;
+ NBParamGpu* nbparam = nb->nbparam;
+
+ /* Free atdat */
+ freeDeviceBuffer(&(nb->atdat->xq));
+ freeDeviceBuffer(&(nb->atdat->f));
+ freeDeviceBuffer(&(nb->atdat->eLJ));
+ freeDeviceBuffer(&(nb->atdat->eElec));
+ freeDeviceBuffer(&(nb->atdat->fShift));
+ freeDeviceBuffer(&(nb->atdat->shiftVec));
+ if (useLjCombRule(nb->nbparam->vdwType))
+ {
+ freeDeviceBuffer(&atdat->ljComb);
+ }
+ else
+ {
+ freeDeviceBuffer(&atdat->atomTypes);
+ }
+
+ /* Free nbparam */
+ if (nbparam->elecType == ElecType::EwaldTab || nbparam->elecType == ElecType::EwaldTabTwin)
+ {
+ destroyParamLookupTable(&nbparam->coulomb_tab, &nbparam->coulomb_tab_texobj);
+ }
+
+ if (!useLjCombRule(nb->nbparam->vdwType))
+ {
+ destroyParamLookupTable(&nbparam->nbfp, &nbparam->nbfp_texobj);
+ }
+
+ if (nbparam->vdwType == VdwType::EwaldGeom || nbparam->vdwType == VdwType::EwaldLB)
+ {
+ destroyParamLookupTable(&nbparam->nbfp_comb, &nbparam->nbfp_comb_texobj);
+ }
+
+ /* Free plist */
+ auto* plist = nb->plist[InteractionLocality::Local];
+ freeDeviceBuffer(&plist->sci);
+ freeDeviceBuffer(&plist->cj4);
+ freeDeviceBuffer(&plist->imask);
+ freeDeviceBuffer(&plist->excl);
+ delete plist;
+ if (nb->bUseTwoStreams)
+ {
+ auto* plist_nl = nb->plist[InteractionLocality::NonLocal];
+ freeDeviceBuffer(&plist_nl->sci);
+ freeDeviceBuffer(&plist_nl->cj4);
+ freeDeviceBuffer(&plist_nl->imask);
+ freeDeviceBuffer(&plist_nl->excl);
+ delete plist_nl;
+ }
+
+ /* Free nbst */
+ pfree(nb->nbst.eLJ);
+ nb->nbst.eLJ = nullptr;
+
+ pfree(nb->nbst.eElec);
+ nb->nbst.eElec = nullptr;
+
+ pfree(nb->nbst.fShift);
+ nb->nbst.fShift = nullptr;
+
+ delete atdat;
+ delete nbparam;
+ delete nb;
+
+ if (debug)
+ {
+ fprintf(debug, "Cleaned up NBNXM GPU data structures.\n");
+ }
+}
+
+DeviceBuffer<gmx::RVec> gpu_get_f(NbnxmGpu* nb)
+{
+ GMX_ASSERT(nb != nullptr, "nb pointer must be valid");
+
+ return nb->atdat->f;
+}
+
} // namespace Nbnxm