From f3cc5be4764c7793e06675b576377245ee777218 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= Date: Fri, 30 Aug 2019 16:01:21 +0200 Subject: [PATCH] Pass gmx::ForceFlags to CPU nbnxm dispatch code Also added one last missing flag, ForceFlags.computeDhdl. Change-Id: Ibcfea7f3975685f2219c5a5e3c8e45c716be1789 --- src/gromacs/mdlib/ppforceworkload.h | 2 ++ src/gromacs/mdlib/sim_util.cpp | 22 ++++++------ src/gromacs/nbnxm/kerneldispatch.cpp | 36 +++++++++---------- .../kernels_reference/kernel_gpu_ref.cpp | 15 ++++---- .../nbnxm/kernels_reference/kernel_gpu_ref.h | 7 +++- src/gromacs/nbnxm/nbnxm.h | 3 +- 6 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/gromacs/mdlib/ppforceworkload.h b/src/gromacs/mdlib/ppforceworkload.h index 43769c66bb..0ff7d990e1 100644 --- a/src/gromacs/mdlib/ppforceworkload.h +++ b/src/gromacs/mdlib/ppforceworkload.h @@ -72,6 +72,8 @@ class ForceFlags bool computeNonbondedForces = false; //! Whether listed forces need to be computed this step bool computeListedForces = false; + //! Whether this step DHDL needs to be computed + bool computeDhdl = false; }; /*! \libinternal diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 61ca2b0758..2d257e20fb 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -314,7 +314,6 @@ static void post_process_forces(const t_commrec *cr, static void do_nb_verlet(t_forcerec *fr, const interaction_const_t *ic, gmx_enerdata_t *enerd, - int legacyForceFlags, const gmx::ForceFlags &forceFlags, const Nbnxm::InteractionLocality ilocality, const int clearF, @@ -322,7 +321,7 @@ static void do_nb_verlet(t_forcerec *fr, t_nrnb *nrnb, gmx_wallcycle_t wcycle) { - if (!(legacyForceFlags & GMX_FORCE_NONBONDED)) + if (!forceFlags.computeNonbondedForces) { /* skip non-bonded calculation */ return; @@ -352,7 +351,7 @@ static void do_nb_verlet(t_forcerec *fr, } } - nbv->dispatchNonbondedKernel(ilocality, *ic, legacyForceFlags, forceFlags, clearF, *fr, enerd, nrnb); + nbv->dispatchNonbondedKernel(ilocality, *ic, forceFlags, clearF, *fr, enerd, nrnb); } static inline void clear_rvecs_omp(int n, rvec v[]) @@ -803,6 +802,7 @@ setupForceFlags(gmx::ForceFlags *flags, flags->computeForces = ((legacyFlags & GMX_FORCE_FORCES) != 0); flags->computeListedForces = ((legacyFlags & GMX_FORCE_LISTED) != 0); flags->computeNonbondedForces = ((legacyFlags & GMX_FORCE_NONBONDED) != 0) && isNonbondedOn; + flags->computeDhdl = ((legacyFlags & GMX_FORCE_DHDL) != 0); } @@ -1150,7 +1150,7 @@ void do_force(FILE *fplog, /* launch local nonbonded work on GPU */ wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFNo, + do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFNo, step, nrnb, wcycle); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); wallcycle_stop(wcycle, ewcLAUNCH_GPU); @@ -1212,7 +1212,7 @@ void do_force(FILE *fplog, /* launch non-local nonbonded tasks on GPU */ wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); - do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo, + do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo, step, nrnb, wcycle); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); @@ -1316,7 +1316,7 @@ void do_force(FILE *fplog, if (!bUseOrEmulGPU) { - do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFYes, + do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFYes, step, nrnb, wcycle); } @@ -1328,14 +1328,14 @@ void do_force(FILE *fplog, nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local, fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms, inputrec->fepvals, lambda.data(), - enerd, flags, nrnb); + enerd, forceFlags, nrnb); if (havePPDomainDecomposition(cr)) { nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal, fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms, inputrec->fepvals, lambda.data(), - enerd, flags, nrnb); + enerd, forceFlags, nrnb); } } @@ -1343,7 +1343,7 @@ void do_force(FILE *fplog, { if (havePPDomainDecomposition(cr)) { - do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo, + do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo, step, nrnb, wcycle); } @@ -1413,7 +1413,7 @@ void do_force(FILE *fplog, else { wallcycle_start_nocount(wcycle, ewcFORCE); - do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes, + do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes, step, nrnb, wcycle); wallcycle_stop(wcycle, ewcFORCE); } @@ -1516,7 +1516,7 @@ void do_force(FILE *fplog, // NOTE: emulation kernel is not included in the balancing region, // but emulation mode does not target performance anyway wallcycle_start_nocount(wcycle, ewcFORCE); - do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::Local, + do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local, DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes, step, nrnb, wcycle); wallcycle_stop(wcycle, ewcFORCE); diff --git a/src/gromacs/nbnxm/kerneldispatch.cpp b/src/gromacs/nbnxm/kerneldispatch.cpp index 1303a20e48..2d53a92dc7 100644 --- a/src/gromacs/nbnxm/kerneldispatch.cpp +++ b/src/gromacs/nbnxm/kerneldispatch.cpp @@ -42,8 +42,8 @@ #include "gromacs/math/vectypes.h" #include "gromacs/mdlib/enerdata_utils.h" #include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/force_flags.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" +#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdtypes/enerdata.h" #include "gromacs/mdtypes/forceoutput.h" #include "gromacs/mdtypes/inputrec.h" @@ -155,7 +155,7 @@ nbnxn_kernel_cpu(const PairlistSet &pairlistSet, nbnxn_atomdata_t *nbat, const interaction_const_t &ic, rvec *shiftVectors, - int forceFlags, + const gmx::ForceFlags &forceFlags, int clearF, real *vCoulomb, real *vVdw, @@ -266,7 +266,7 @@ nbnxn_kernel_cpu(const PairlistSet &pairlistSet, // TODO: Change to reference const NbnxnPairlistCpu *pairlist = &pairlists[nb]; - if (!(forceFlags & GMX_FORCE_ENERGY)) + if (!forceFlags.computeEnergy) { /* Don't calculate energies */ switch (kernelSetup.kernelType) @@ -396,7 +396,7 @@ nbnxn_kernel_cpu(const PairlistSet &pairlistSet, } wallcycle_sub_stop(wcycle, ewcsNONBONDED_KERNEL); - if (forceFlags & GMX_FORCE_ENERGY) + if (forceFlags.computeEnergy) { reduce_energies_over_lists(nbat, pairlists.ssize(), vVdw, vCoulomb); } @@ -406,7 +406,7 @@ static void accountFlops(t_nrnb *nrnb, const PairlistSet &pairlistSet, const nonbonded_verlet_t &nbv, const interaction_const_t &ic, - const int forceFlags) + const gmx::ForceFlags &forceFlags) { const bool usingGpuKernels = nbv.useGpu(); @@ -425,7 +425,7 @@ static void accountFlops(t_nrnb *nrnb, enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB; } int enr_nbnxn_kernel_lj = eNR_NBNXN_LJ; - if (forceFlags & GMX_FORCE_ENERGY) + if (forceFlags.computeEnergy) { /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */ enr_nbnxn_kernel_ljc += 1; @@ -440,23 +440,22 @@ static void accountFlops(t_nrnb *nrnb, inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF, pairlistSet.natpair_q_); - const bool calcEnergy = ((forceFlags & GMX_FORCE_ENERGY) != 0); if (ic.vdw_modifier == eintmodFORCESWITCH) { /* We add up the switch cost separately */ - inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW + (calcEnergy ? 1 : 0), + inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW + (forceFlags.computeEnergy ? 1 : 0), pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_); } if (ic.vdw_modifier == eintmodPOTSWITCH) { /* We add up the switch cost separately */ - inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW + (calcEnergy ? 1 : 0), + inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW + (forceFlags.computeEnergy ? 1 : 0), pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_); } if (ic.vdwtype == evdwPME) { /* We add up the LJ Ewald cost separately */ - inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD + (calcEnergy ? 1 : 0), + inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD + (forceFlags.computeEnergy ? 1 : 0), pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_); } } @@ -464,7 +463,6 @@ static void accountFlops(t_nrnb *nrnb, void nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality, const interaction_const_t &ic, - int legacyForceFlags, const gmx::ForceFlags &forceFlags, int clearF, const t_forcerec &fr, @@ -483,7 +481,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality nbat.get(), ic, fr.shift_vec, - legacyForceFlags, + forceFlags, clearF, enerd->grpp.ener[egCOULSR].data(), fr.bBHAM ? @@ -500,7 +498,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality nbnxn_kernel_gpu_ref(pairlistSet.gpuList(), nbat.get(), &ic, fr.shift_vec, - legacyForceFlags, + forceFlags, clearF, nbat->out[0].f, nbat->out[0].fshift.data(), @@ -515,7 +513,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality } - accountFlops(nrnb, pairlistSet, *this, ic, legacyForceFlags); + accountFlops(nrnb, pairlistSet, *this, ic, forceFlags); } void @@ -527,7 +525,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocali t_lambda *fepvals, real *lambda, gmx_enerdata_t *enerd, - const int forceFlags, + const gmx::ForceFlags &forceFlags, t_nrnb *nrnb) { const auto nbl_fep = pairlistSets().pairlistSet(iLocality).fepLists(); @@ -543,15 +541,15 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocali donb_flags |= GMX_NONBONDED_DO_SR; /* Currently all group scheme kernels always calculate (shift-)forces */ - if (forceFlags & GMX_FORCE_FORCES) + if (forceFlags.computeForces) { donb_flags |= GMX_NONBONDED_DO_FORCE; } - if (forceFlags & GMX_FORCE_VIRIAL) + if (forceFlags.computeVirial) { donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; } - if (forceFlags & GMX_FORCE_ENERGY) + if (forceFlags.computeEnergy) { donb_flags |= GMX_NONBONDED_DO_POTENTIAL; } @@ -594,7 +592,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocali /* If we do foreign lambda and we have soft-core interactions * we have to recalculate the (non-linear) energies contributions. */ - if (fepvals->n_lambda > 0 && (forceFlags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) + if (fepvals->n_lambda > 0 && forceFlags.computeDhdl && fepvals->sc_alpha != 0) { real lam_i[efptNR]; kernel_data.flags = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA; diff --git a/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.cpp b/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.cpp index 91bad8a61a..16a4397112 100644 --- a/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.cpp +++ b/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.cpp @@ -43,7 +43,7 @@ #include "gromacs/math/functions.h" #include "gromacs/math/utilities.h" #include "gromacs/math/vec.h" -#include "gromacs/mdlib/force_flags.h" +#include "gromacs/mdlib/ppforceworkload.h" #include "gromacs/mdtypes/md_enums.h" #include "gromacs/nbnxm/atomdata.h" #include "gromacs/nbnxm/nbnxm.h" @@ -59,14 +59,13 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, const nbnxn_atomdata_t *nbat, const interaction_const_t *iconst, rvec *shift_vec, - int force_flags, + const gmx::ForceFlags &forceFlags, int clearF, gmx::ArrayRef f, real * fshift, real * Vc, real * Vvdw) { - gmx_bool bEner; gmx_bool bEwald; const real *Ftab = nullptr; real rcut2, rvdw2, rlist2; @@ -114,8 +113,6 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, } } - bEner = ((force_flags & GMX_FORCE_ENERGY) != 0); - bEwald = EEL_FULL(iconst->eeltype); if (bEwald) { @@ -265,7 +262,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, /* Reaction-field */ krsq = iconst->k_rf*rsq; fscal = qq*(int_bit*rinv - 2*krsq)*rinvsq; - if (bEner) + if (forceFlags.computeEnergy) { vcoul = qq*(int_bit*rinv + krsq - iconst->c_rf); } @@ -281,7 +278,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, fscal = qq*(int_bit*rinvsq - fexcl)*rinv; - if (bEner) + if (forceFlags.computeEnergy) { vcoul = qq*((int_bit - std::erf(iconst->ewaldcoeff_q*r))*rinv - int_bit*iconst->sh_ewald); } @@ -300,7 +297,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, Vvdw_rep = c12*rinvsix*rinvsix; fscal += (Vvdw_rep - Vvdw_disp)*rinvsq; - if (bEner) + if (forceFlags.computeEnergy) { vctot += vcoul; @@ -350,7 +347,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, } } - if (bEner) + if (forceFlags.computeEnergy) { ggid = 0; Vc[ggid] = Vc[ggid] + vctot; diff --git a/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h b/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h index 9e744cde07..d26747e0c7 100644 --- a/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h +++ b/src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h @@ -44,13 +44,18 @@ struct NbnxnPairlistGpu; struct nbnxn_atomdata_t; +namespace gmx +{ +class ForceFlags; +} + /* Reference (slow) kernel for nb n vs n GPU type pair lists */ void nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu *nbl, const nbnxn_atomdata_t *nbat, const interaction_const_t *iconst, rvec *shift_vec, - int force_flags, + const gmx::ForceFlags &forceFlags, int clearF, gmx::ArrayRef f, real * fshift, diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h index 5a6ff9dea4..1a2ce8cc0c 100644 --- a/src/gromacs/nbnxm/nbnxm.h +++ b/src/gromacs/nbnxm/nbnxm.h @@ -286,7 +286,6 @@ struct nonbonded_verlet_t //! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU void dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality, const interaction_const_t &ic, - int legacyForceFlags, const gmx::ForceFlags &forceFlags, int clearF, const t_forcerec &fr, @@ -302,7 +301,7 @@ struct nonbonded_verlet_t t_lambda *fepvals, real *lambda, gmx_enerdata_t *enerd, - int forceFlags, + const gmx::ForceFlags &forceFlags, t_nrnb *nrnb); /*! \brief Add the forces stored in nbat to f, zeros the forces in nbat -- 2.22.0