Create dedicated subcounter for nonbonded FEP
authorSzilárd Páll <pall.szilard@gmail.com>
Thu, 27 Jun 2019 15:26:50 +0000 (17:26 +0200)
committerMark Abraham <mark.j.abraham@gmail.com>
Sun, 30 Jun 2019 22:01:14 +0000 (00:01 +0200)
Now all nonbonded work has their own separate subcoutners which allows
measuring the performance of each task separately.

Refs #2997

Change-Id: I601445364592923d08087a858da4629b0b58ae76

src/gromacs/mdlib/sim_util.cpp
src/gromacs/nbnxm/kerneldispatch.cpp
src/gromacs/nbnxm/nbnxm.h
src/gromacs/timing/wallcycle.cpp
src/gromacs/timing/wallcycle.h

index 8cb7e5a2c2134b994d01fdb4ece0d48ee455a3c6..baecb96ba1d9e562842b76d7da9996440a6cf627 100644 (file)
@@ -346,16 +346,9 @@ static void do_nb_verlet(t_forcerec                       *fr,
             nbv->dispatchPruneKernelCpu(ilocality, fr->shift_vec);
             wallcycle_sub_stop(wcycle, ewcsNONBONDED_PRUNING);
         }
-
-        wallcycle_sub_start(wcycle, ewcsNONBONDED);
     }
 
     nbv->dispatchNonbondedKernel(ilocality, *ic, flags, clearF, *fr, enerd, nrnb, wcycle);
-
-    if (!nbv->useGpu())
-    {
-        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
-    }
 }
 
 static inline void clear_rvecs_omp(int n, rvec v[])
@@ -1259,20 +1252,18 @@ void do_force(FILE                                     *fplog,
         /* Calculate the local and non-local free energy interactions here.
          * Happens here on the CPU both with and without GPU.
          */
-        wallcycle_sub_start(wcycle, ewcsNONBONDED);
         nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local,
                                       fr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f, *mdatoms,
                                       inputrec->fepvals, lambda.data(),
-                                      enerd, flags, nrnb);
+                                      enerd, flags, nrnb, wcycle);
 
         if (havePPDomainDecomposition(cr))
         {
             nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal,
                                           fr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f, *mdatoms,
                                           inputrec->fepvals, lambda.data(),
-                                          enerd, flags, nrnb);
+                                          enerd, flags, nrnb, wcycle);
         }
-        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
     }
 
     if (!bUseOrEmulGPU)
index 52bb7c2e823caf1bd49407a6a9f3db7b5c89c1f7..99b96d11b6fce7fd9c8ac9c2fd2187e958841873 100644 (file)
@@ -241,7 +241,7 @@ nbnxn_kernel_cpu(const PairlistSet              &pairlistSet,
     gmx::ArrayRef<const NbnxnPairlistCpu> pairlists = pairlistSet.cpuLists();
 
     int gmx_unused                        nthreads = gmx_omp_nthreads_get(emntNonbonded);
-    wallcycle_sub_start(wcycle, ewcsNBFCLEARBUF);
+    wallcycle_sub_start(wcycle, ewcsNONBONDED_CLEAR);
 #pragma omp parallel for schedule(static) num_threads(nthreads)
     for (int nb = 0; nb < pairlists.ssize(); nb++)
     {
@@ -258,8 +258,8 @@ nbnxn_kernel_cpu(const PairlistSet              &pairlistSet,
 
         if (nb == 0)
         {
-            wallcycle_sub_stop(wcycle, ewcsNBFCLEARBUF);
-            wallcycle_sub_start(wcycle, ewcsNBFKERNEL);
+            wallcycle_sub_stop(wcycle, ewcsNONBONDED_CLEAR);
+            wallcycle_sub_start(wcycle, ewcsNONBONDED_KERNEL);
         }
 
         // TODO: Change to reference
@@ -393,7 +393,7 @@ nbnxn_kernel_cpu(const PairlistSet              &pairlistSet,
             }
         }
     }
-    wallcycle_sub_stop(wcycle, ewcsNBFKERNEL);
+    wallcycle_sub_stop(wcycle, ewcsNONBONDED_KERNEL);
 
     if (forceFlags & GMX_FORCE_ENERGY)
     {
@@ -527,7 +527,8 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
                                              real                       *lambda,
                                              gmx_enerdata_t             *enerd,
                                              const int                   forceFlags,
-                                             t_nrnb                     *nrnb)
+                                             t_nrnb                     *nrnb,
+                                             gmx_wallcycle              *wcycle)
 {
     const auto nbl_fep = pairlistSets().pairlistSet(iLocality).fepLists();
 
@@ -566,6 +567,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
 
     GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_fep.ssize(), "Number of lists should be same as number of NB threads");
 
+    wallcycle_sub_start(wcycle, ewcsNONBONDED_FEP);
 #pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
     for (int th = 0; th < nbl_fep.ssize(); th++)
     {
@@ -622,4 +624,5 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
             enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
         }
     }
+    wallcycle_sub_stop(wcycle, ewcsNONBONDED_FEP);
 }
index 3c2d1f6af1a322450d05aa760b4bf5645337b1e1..b8bdb853cb631c447d69cf8fa646df29753eb4ed 100644 (file)
@@ -291,7 +291,8 @@ struct nonbonded_verlet_t
                                       real                       *lambda,
                                       gmx_enerdata_t             *enerd,
                                       int                         forceFlags,
-                                      t_nrnb                     *nrnb);
+                                      t_nrnb                     *nrnb,
+                                      gmx_wallcycle              *wcycle);
 
         //! Add the forces stored in nbat to f, zeros the forces in nbat */
         void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality  locality,
index ffe8b1eaa2fb3da767ab185274cecc299d3465ba..e558a4b4456b64a4e86e1ee0db9c63ac15fbaa6b 100644 (file)
@@ -126,8 +126,9 @@ static const char *wcsn[ewcsNR] =
     "Restraints F",
     "Listed buffer ops.",
     "Nonbonded pruning",
-    "Nonbonded F",
-    "NB F kernel", "NB F clear buf",
+    "Nonbonded F kernel",
+    "Nonbonded F clear",
+    "Nonbonded FEP",
     "Launch NB GPU tasks",
     "Launch Bonded GPU tasks",
     "Launch PME GPU tasks",
index 58d1e500330d38b94bf5b20957650cc385fed3ff..f2377ebe5ca787f0d0081d8fc415d8f4556b8298 100644 (file)
@@ -72,8 +72,9 @@ enum {
     ewcsRESTRAINTS,
     ewcsLISTED_BUF_OPS,
     ewcsNONBONDED_PRUNING,
-    ewcsNONBONDED,
-    ewcsNBFKERNEL, ewcsNBFCLEARBUF,
+    ewcsNONBONDED_KERNEL,
+    ewcsNONBONDED_CLEAR,
+    ewcsNONBONDED_FEP,
     ewcsLAUNCH_GPU_NONBONDED,
     ewcsLAUNCH_GPU_BONDED,
     ewcsLAUNCH_GPU_PME,