Pass gmx::ForceFlags to CPU nbnxm dispatch code
authorSzilárd Páll <pall.szilard@gmail.com>
Fri, 30 Aug 2019 14:01:21 +0000 (16:01 +0200)
committerMark Abraham <mark.j.abraham@gmail.com>
Mon, 9 Sep 2019 21:06:10 +0000 (23:06 +0200)
Also added one last missing flag, ForceFlags.computeDhdl.

Change-Id: Ibcfea7f3975685f2219c5a5e3c8e45c716be1789

src/gromacs/mdlib/ppforceworkload.h
src/gromacs/mdlib/sim_util.cpp
src/gromacs/nbnxm/kerneldispatch.cpp
src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.cpp
src/gromacs/nbnxm/kernels_reference/kernel_gpu_ref.h
src/gromacs/nbnxm/nbnxm.h

index 43769c66bb6505da8faa68543a0126723ce6fd06..0ff7d990e166d9ffa3fee54e5f6713f332af3b43 100644 (file)
@@ -72,6 +72,8 @@ class ForceFlags
         bool computeNonbondedForces = false;
         //! Whether listed forces need to be computed this step
         bool computeListedForces = false;
+        //! Whether this step DHDL needs to be computed
+        bool computeDhdl = false;
 };
 
 /*! \libinternal
index 61ca2b0758881e2574edd22a6ae8231ecf5cb709..2d257e20fb2d6f50791a6008656450b1475a35f7 100644 (file)
@@ -314,7 +314,6 @@ static void post_process_forces(const t_commrec       *cr,
 static void do_nb_verlet(t_forcerec                       *fr,
                          const interaction_const_t        *ic,
                          gmx_enerdata_t                   *enerd,
-                         int                               legacyForceFlags,
                          const gmx::ForceFlags            &forceFlags,
                          const Nbnxm::InteractionLocality  ilocality,
                          const int                         clearF,
@@ -322,7 +321,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
                          t_nrnb                           *nrnb,
                          gmx_wallcycle_t                   wcycle)
 {
-    if (!(legacyForceFlags & GMX_FORCE_NONBONDED))
+    if (!forceFlags.computeNonbondedForces)
     {
         /* skip non-bonded calculation */
         return;
@@ -352,7 +351,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
         }
     }
 
-    nbv->dispatchNonbondedKernel(ilocality, *ic, legacyForceFlags, forceFlags, clearF, *fr, enerd, nrnb);
+    nbv->dispatchNonbondedKernel(ilocality, *ic, forceFlags, clearF, *fr, enerd, nrnb);
 }
 
 static inline void clear_rvecs_omp(int n, rvec v[])
@@ -803,6 +802,7 @@ setupForceFlags(gmx::ForceFlags *flags,
     flags->computeForces          = ((legacyFlags & GMX_FORCE_FORCES) != 0);
     flags->computeListedForces    = ((legacyFlags & GMX_FORCE_LISTED) != 0);
     flags->computeNonbondedForces = ((legacyFlags & GMX_FORCE_NONBONDED) != 0) && isNonbondedOn;
+    flags->computeDhdl            = ((legacyFlags & GMX_FORCE_DHDL) != 0);
 }
 
 
@@ -1150,7 +1150,7 @@ void do_force(FILE                                     *fplog,
 
         /* launch local nonbonded work on GPU */
         wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-        do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFNo,
+        do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFNo,
                      step, nrnb, wcycle);
         wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
         wallcycle_stop(wcycle, ewcLAUNCH_GPU);
@@ -1212,7 +1212,7 @@ void do_force(FILE                                     *fplog,
 
             /* launch non-local nonbonded tasks on GPU */
             wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-            do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
+            do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
                          step, nrnb, wcycle);
             wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
 
@@ -1316,7 +1316,7 @@ void do_force(FILE                                     *fplog,
 
     if (!bUseOrEmulGPU)
     {
-        do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFYes,
+        do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local, enbvClearFYes,
                      step, nrnb, wcycle);
     }
 
@@ -1328,14 +1328,14 @@ void do_force(FILE                                     *fplog,
         nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local,
                                       fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms,
                                       inputrec->fepvals, lambda.data(),
-                                      enerd, flags, nrnb);
+                                      enerd, forceFlags, nrnb);
 
         if (havePPDomainDecomposition(cr))
         {
             nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal,
                                           fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms,
                                           inputrec->fepvals, lambda.data(),
-                                          enerd, flags, nrnb);
+                                          enerd, forceFlags, nrnb);
         }
     }
 
@@ -1343,7 +1343,7 @@ void do_force(FILE                                     *fplog,
     {
         if (havePPDomainDecomposition(cr))
         {
-            do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
+            do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFNo,
                          step, nrnb, wcycle);
         }
 
@@ -1413,7 +1413,7 @@ void do_force(FILE                                     *fplog,
             else
             {
                 wallcycle_start_nocount(wcycle, ewcFORCE);
-                do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes,
+                do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::NonLocal, enbvClearFYes,
                              step, nrnb, wcycle);
                 wallcycle_stop(wcycle, ewcFORCE);
             }
@@ -1516,7 +1516,7 @@ void do_force(FILE                                     *fplog,
         // NOTE: emulation kernel is not included in the balancing region,
         // but emulation mode does not target performance anyway
         wallcycle_start_nocount(wcycle, ewcFORCE);
-        do_nb_verlet(fr, ic, enerd, flags, forceFlags, Nbnxm::InteractionLocality::Local,
+        do_nb_verlet(fr, ic, enerd, forceFlags, Nbnxm::InteractionLocality::Local,
                      DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
                      step, nrnb, wcycle);
         wallcycle_stop(wcycle, ewcFORCE);
index 1303a20e483d67467a635795deb1578efab32274..2d53a92dc752691fca58e0ee902d8f577a22c68e 100644 (file)
@@ -42,8 +42,8 @@
 #include "gromacs/math/vectypes.h"
 #include "gromacs/mdlib/enerdata_utils.h"
 #include "gromacs/mdlib/force.h"
-#include "gromacs/mdlib/force_flags.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
+#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdtypes/enerdata.h"
 #include "gromacs/mdtypes/forceoutput.h"
 #include "gromacs/mdtypes/inputrec.h"
@@ -155,7 +155,7 @@ nbnxn_kernel_cpu(const PairlistSet              &pairlistSet,
                  nbnxn_atomdata_t               *nbat,
                  const interaction_const_t      &ic,
                  rvec                           *shiftVectors,
-                 int                             forceFlags,
+                 const gmx::ForceFlags          &forceFlags,
                  int                             clearF,
                  real                           *vCoulomb,
                  real                           *vVdw,
@@ -266,7 +266,7 @@ nbnxn_kernel_cpu(const PairlistSet              &pairlistSet,
         // TODO: Change to reference
         const NbnxnPairlistCpu *pairlist = &pairlists[nb];
 
-        if (!(forceFlags & GMX_FORCE_ENERGY))
+        if (!forceFlags.computeEnergy)
         {
             /* Don't calculate energies */
             switch (kernelSetup.kernelType)
@@ -396,7 +396,7 @@ nbnxn_kernel_cpu(const PairlistSet              &pairlistSet,
     }
     wallcycle_sub_stop(wcycle, ewcsNONBONDED_KERNEL);
 
-    if (forceFlags & GMX_FORCE_ENERGY)
+    if (forceFlags.computeEnergy)
     {
         reduce_energies_over_lists(nbat, pairlists.ssize(), vVdw, vCoulomb);
     }
@@ -406,7 +406,7 @@ static void accountFlops(t_nrnb                           *nrnb,
                          const PairlistSet                &pairlistSet,
                          const nonbonded_verlet_t         &nbv,
                          const interaction_const_t        &ic,
-                         const int                         forceFlags)
+                         const gmx::ForceFlags            &forceFlags)
 {
     const bool usingGpuKernels = nbv.useGpu();
 
@@ -425,7 +425,7 @@ static void accountFlops(t_nrnb                           *nrnb,
         enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
     }
     int enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
-    if (forceFlags & GMX_FORCE_ENERGY)
+    if (forceFlags.computeEnergy)
     {
         /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
         enr_nbnxn_kernel_ljc += 1;
@@ -440,23 +440,22 @@ static void accountFlops(t_nrnb                           *nrnb,
     inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
              pairlistSet.natpair_q_);
 
-    const bool calcEnergy = ((forceFlags & GMX_FORCE_ENERGY) != 0);
     if (ic.vdw_modifier == eintmodFORCESWITCH)
     {
         /* We add up the switch cost separately */
-        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW + (calcEnergy ? 1 : 0),
+        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW + (forceFlags.computeEnergy ? 1 : 0),
                  pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_);
     }
     if (ic.vdw_modifier == eintmodPOTSWITCH)
     {
         /* We add up the switch cost separately */
-        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW + (calcEnergy ? 1 : 0),
+        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW + (forceFlags.computeEnergy ? 1 : 0),
                  pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_);
     }
     if (ic.vdwtype == evdwPME)
     {
         /* We add up the LJ Ewald cost separately */
-        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD + (calcEnergy ? 1 : 0),
+        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD + (forceFlags.computeEnergy ? 1 : 0),
                  pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_);
     }
 }
@@ -464,7 +463,6 @@ static void accountFlops(t_nrnb                           *nrnb,
 void
 nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality,
                                             const interaction_const_t &ic,
-                                            int                        legacyForceFlags,
                                             const gmx::ForceFlags     &forceFlags,
                                             int                        clearF,
                                             const t_forcerec          &fr,
@@ -483,7 +481,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality
                              nbat.get(),
                              ic,
                              fr.shift_vec,
-                             legacyForceFlags,
+                             forceFlags,
                              clearF,
                              enerd->grpp.ener[egCOULSR].data(),
                              fr.bBHAM ?
@@ -500,7 +498,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality
             nbnxn_kernel_gpu_ref(pairlistSet.gpuList(),
                                  nbat.get(), &ic,
                                  fr.shift_vec,
-                                 legacyForceFlags,
+                                 forceFlags,
                                  clearF,
                                  nbat->out[0].f,
                                  nbat->out[0].fshift.data(),
@@ -515,7 +513,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality
 
     }
 
-    accountFlops(nrnb, pairlistSet, *this, ic, legacyForceFlags);
+    accountFlops(nrnb, pairlistSet, *this, ic, forceFlags);
 }
 
 void
@@ -527,7 +525,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
                                              t_lambda                   *fepvals,
                                              real                       *lambda,
                                              gmx_enerdata_t             *enerd,
-                                             const int                   forceFlags,
+                                             const gmx::ForceFlags      &forceFlags,
                                              t_nrnb                     *nrnb)
 {
     const auto nbl_fep = pairlistSets().pairlistSet(iLocality).fepLists();
@@ -543,15 +541,15 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
     donb_flags |= GMX_NONBONDED_DO_SR;
 
     /* Currently all group scheme kernels always calculate (shift-)forces */
-    if (forceFlags & GMX_FORCE_FORCES)
+    if (forceFlags.computeForces)
     {
         donb_flags |= GMX_NONBONDED_DO_FORCE;
     }
-    if (forceFlags & GMX_FORCE_VIRIAL)
+    if (forceFlags.computeVirial)
     {
         donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
     }
-    if (forceFlags & GMX_FORCE_ENERGY)
+    if (forceFlags.computeEnergy)
     {
         donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
     }
@@ -594,7 +592,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
     /* If we do foreign lambda and we have soft-core interactions
      * we have to recalculate the (non-linear) energies contributions.
      */
-    if (fepvals->n_lambda > 0 && (forceFlags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
+    if (fepvals->n_lambda > 0 && forceFlags.computeDhdl && fepvals->sc_alpha != 0)
     {
         real lam_i[efptNR];
         kernel_data.flags          = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA;
index 91bad8a61a414ab43e233752b6bdd1acb4f67b5a..16a439711295b359e5276ee0508ad8462e0b13a4 100644 (file)
@@ -43,7 +43,7 @@
 #include "gromacs/math/functions.h"
 #include "gromacs/math/utilities.h"
 #include "gromacs/math/vec.h"
-#include "gromacs/mdlib/force_flags.h"
+#include "gromacs/mdlib/ppforceworkload.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/nbnxm/atomdata.h"
 #include "gromacs/nbnxm/nbnxm.h"
@@ -59,14 +59,13 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                      const nbnxn_atomdata_t     *nbat,
                      const interaction_const_t  *iconst,
                      rvec                       *shift_vec,
-                     int                         force_flags,
+                     const gmx::ForceFlags      &forceFlags,
                      int                         clearF,
                      gmx::ArrayRef<real>         f,
                      real  *                     fshift,
                      real  *                     Vc,
                      real  *                     Vvdw)
 {
-    gmx_bool            bEner;
     gmx_bool            bEwald;
     const real         *Ftab = nullptr;
     real                rcut2, rvdw2, rlist2;
@@ -114,8 +113,6 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
         }
     }
 
-    bEner = ((force_flags & GMX_FORCE_ENERGY) != 0);
-
     bEwald = EEL_FULL(iconst->eeltype);
     if (bEwald)
     {
@@ -265,7 +262,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                                     /* Reaction-field */
                                     krsq  = iconst->k_rf*rsq;
                                     fscal = qq*(int_bit*rinv - 2*krsq)*rinvsq;
-                                    if (bEner)
+                                    if (forceFlags.computeEnergy)
                                     {
                                         vcoul = qq*(int_bit*rinv + krsq - iconst->c_rf);
                                     }
@@ -281,7 +278,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
 
                                     fscal = qq*(int_bit*rinvsq - fexcl)*rinv;
 
-                                    if (bEner)
+                                    if (forceFlags.computeEnergy)
                                     {
                                         vcoul = qq*((int_bit - std::erf(iconst->ewaldcoeff_q*r))*rinv - int_bit*iconst->sh_ewald);
                                     }
@@ -300,7 +297,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                                     Vvdw_rep  = c12*rinvsix*rinvsix;
                                     fscal    += (Vvdw_rep - Vvdw_disp)*rinvsq;
 
-                                    if (bEner)
+                                    if (forceFlags.computeEnergy)
                                     {
                                         vctot   += vcoul;
 
@@ -350,7 +347,7 @@ nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
             }
         }
 
-        if (bEner)
+        if (forceFlags.computeEnergy)
         {
             ggid             = 0;
             Vc[ggid]         = Vc[ggid]   + vctot;
index 9e744cde070e8cf489d572ae721e9471a43dade2..d26747e0c7e5664d67d3755f88c2718fcd7e56db 100644 (file)
 struct NbnxnPairlistGpu;
 struct nbnxn_atomdata_t;
 
+namespace gmx
+{
+class ForceFlags;
+}
+
 /* Reference (slow) kernel for nb n vs n GPU type pair lists */
 void
 nbnxn_kernel_gpu_ref(const NbnxnPairlistGpu     *nbl,
                      const nbnxn_atomdata_t     *nbat,
                      const interaction_const_t  *iconst,
                      rvec                       *shift_vec,
-                     int                         force_flags,
+                     const gmx::ForceFlags      &forceFlags,
                      int                         clearF,
                      gmx::ArrayRef<real>         f,
                      real  *                     fshift,
index 5a6ff9dea4a2aac597d38c3055cab4788e612b31..1a2ce8cc0c5b2dde9f5bc7a1161cbeae60e2de6a 100644 (file)
@@ -286,7 +286,6 @@ struct nonbonded_verlet_t
         //! \brief Executes the non-bonded kernel of the GPU or launches it on the GPU
         void dispatchNonbondedKernel(Nbnxm::InteractionLocality  iLocality,
                                      const interaction_const_t  &ic,
-                                     int                         legacyForceFlags,
                                      const gmx::ForceFlags      &forceFlags,
                                      int                         clearF,
                                      const t_forcerec           &fr,
@@ -302,7 +301,7 @@ struct nonbonded_verlet_t
                                       t_lambda                   *fepvals,
                                       real                       *lambda,
                                       gmx_enerdata_t             *enerd,
-                                      int                         forceFlags,
+                                      const gmx::ForceFlags      &forceFlags,
                                       t_nrnb                     *nrnb);
 
         /*! \brief Add the forces stored in nbat to f, zeros the forces in nbat