Store wcycle pointer in nonbonded_verlet_t
authorSzilárd Páll <pall.szilard@gmail.com>
Mon, 29 Jul 2019 16:24:46 +0000 (18:24 +0200)
committerArtem Zhmurov <zhmurov@gmail.com>
Wed, 31 Jul 2019 11:21:13 +0000 (13:21 +0200)
Removed wcycle arguments from method calls that no longer need it and
moved a few instances of cycle counting into the nbmxn module.

Change-Id: Ic5646b3bb85ed2c66137e9db7bd70822df95042b

src/gromacs/mdlib/forcerec.cpp
src/gromacs/mdlib/sim_util.cpp
src/gromacs/nbnxm/kerneldispatch.cpp
src/gromacs/nbnxm/nbnxm.cpp
src/gromacs/nbnxm/nbnxm.h
src/gromacs/nbnxm/nbnxm_setup.cpp
src/gromacs/nbnxm/prunekerneldispatch.cpp

index af151e519d316417449bd02bf0605a768457a740..db5bd9fd1df02e0e58eb6f252a0e78239fe155e8 100644 (file)
@@ -1980,7 +1980,7 @@ void init_forcerec(FILE                             *fp,
 
         fr->nbv = Nbnxm::init_nb_verlet(mdlog, bFEP_NonBonded, ir, fr,
                                         cr, hardwareInfo, deviceInfo,
-                                        mtop, box);
+                                        mtop, box, wcycle);
 
         if (useGpuForBonded)
         {
index e525fad2974fb2fda89edbbc6b9bca158520d32c..70a25665a2c574cfcbdc6258dd1d77655e5653d7 100644 (file)
@@ -346,7 +346,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
         }
     }
 
-    nbv->dispatchNonbondedKernel(ilocality, *ic, flags, clearF, *fr, enerd, nrnb, wcycle);
+    nbv->dispatchNonbondedKernel(ilocality, *ic, flags, clearF, *fr, enerd, nrnb);
 }
 
 static inline void clear_rvecs_omp(int n, rvec v[])
@@ -693,8 +693,7 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t                  *nbv
                 nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local,
                                               as_rvec_array(force->unpaddedArrayRef().data()),
                                               BufferOpsUseGpu::False,
-                                              GpuBufferOpsAccumulateForce::Null,
-                                              wcycle);
+                                              GpuBufferOpsAccumulateForce::Null);
             }
         }
     }
@@ -831,11 +830,7 @@ launchGpuEndOfStepTasks(nonbonded_verlet_t         *nbv,
          */
         if (nbv->isDynamicPruningStepGpu(step))
         {
-            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
-            wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
             nbv->dispatchPruneKernelGpu(step);
-            wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-            wallcycle_stop(wcycle, ewcLAUNCH_GPU);
         }
 
         /* now clear the GPU outputs while we finish the step on the CPU */
@@ -1108,13 +1103,13 @@ void do_force(FILE                                     *fplog,
         // NS step is also a virial step (on which f buf ops are deactivated).
         if (c_enableGpuBufOps && bUseGPU && (GMX_GPU == GMX_GPU_CUDA))
         {
-            nbv->atomdata_init_add_nbat_f_to_f_gpu(wcycle);
+            nbv->atomdata_init_add_nbat_f_to_f_gpu();
         }
     }
     else
     {
         nbv->setCoordinates(Nbnxm::AtomLocality::Local, false,
-                            x.unpaddedArrayRef(), useGpuXBufOps, pme_gpu_get_device_x(fr->pmedata), wcycle);
+                            x.unpaddedArrayRef(), useGpuXBufOps, pme_gpu_get_device_x(fr->pmedata));
     }
 
     if (bUseGPU)
@@ -1181,7 +1176,7 @@ void do_force(FILE                                     *fplog,
             dd_move_x(cr->dd, box, x.unpaddedArrayRef(), wcycle);
 
             nbv->setCoordinates(Nbnxm::AtomLocality::NonLocal, false,
-                                x.unpaddedArrayRef(), useGpuXBufOps, pme_gpu_get_device_x(fr->pmedata), wcycle);
+                                x.unpaddedArrayRef(), useGpuXBufOps, pme_gpu_get_device_x(fr->pmedata));
 
         }
 
@@ -1318,14 +1313,14 @@ void do_force(FILE                                     *fplog,
         nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local,
                                       fr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f, *mdatoms,
                                       inputrec->fepvals, lambda.data(),
-                                      enerd, flags, nrnb, wcycle);
+                                      enerd, flags, nrnb);
 
         if (havePPDomainDecomposition(cr))
         {
             nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal,
                                           fr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f, *mdatoms,
                                           inputrec->fepvals, lambda.data(),
-                                          enerd, flags, nrnb, wcycle);
+                                          enerd, flags, nrnb);
         }
     }
 
@@ -1344,8 +1339,7 @@ void do_force(FILE                                     *fplog,
         wallcycle_stop(wcycle, ewcFORCE);
         nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.f,
                                       BufferOpsUseGpu::False,
-                                      GpuBufferOpsAccumulateForce::Null,
-                                      wcycle);
+                                      GpuBufferOpsAccumulateForce::Null);
 
         wallcycle_start_nocount(wcycle, ewcFORCE);
 
@@ -1427,7 +1421,7 @@ void do_force(FILE                                     *fplog,
                 nbv->launch_copy_f_to_gpu(forceOut.f, Nbnxm::AtomLocality::NonLocal);
             }
             nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::NonLocal,
-                                          forceOut.f, useGpuFBufOps, accumulateForce, wcycle);
+                                          forceOut.f, useGpuFBufOps, accumulateForce);
             if (useGpuFBufOps == BufferOpsUseGpu::True)
             {
                 nbv->launch_copy_f_from_gpu(forceOut.f, Nbnxm::AtomLocality::NonLocal);
@@ -1536,7 +1530,7 @@ void do_force(FILE                                     *fplog,
             nbv->launch_copy_f_to_gpu(forceOut.f, Nbnxm::AtomLocality::Local);
         }
         nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local,
-                                      forceOut.f, useGpuFBufOps, accumulateForce, wcycle);
+                                      forceOut.f, useGpuFBufOps, accumulateForce);
         if (useGpuFBufOps == BufferOpsUseGpu::True)
         {
             nbv->launch_copy_f_from_gpu(forceOut.f, Nbnxm::AtomLocality::Local);
index 99b96d11b6fce7fd9c8ac9c2fd2187e958841873..a600029a2b14977bbe062eda09dfe796b4ef5640 100644 (file)
@@ -467,8 +467,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality
                                             int                        clearF,
                                             const t_forcerec          &fr,
                                             gmx_enerdata_t            *enerd,
-                                            t_nrnb                    *nrnb,
-                                            gmx_wallcycle             *wcycle)
+                                            t_nrnb                    *nrnb)
 {
     const PairlistSet &pairlistSet = pairlistSets().pairlistSet(iLocality);
 
@@ -488,7 +487,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality
                              fr.bBHAM ?
                              enerd->grpp.ener[egBHAMSR].data() :
                              enerd->grpp.ener[egLJSR].data(),
-                             wcycle);
+                             wcycle_);
             break;
 
         case Nbnxm::KernelType::Gpu8x8x8:
@@ -527,8 +526,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
                                              real                       *lambda,
                                              gmx_enerdata_t             *enerd,
                                              const int                   forceFlags,
-                                             t_nrnb                     *nrnb,
-                                             gmx_wallcycle              *wcycle)
+                                             t_nrnb                     *nrnb)
 {
     const auto nbl_fep = pairlistSets().pairlistSet(iLocality).fepLists();
 
@@ -567,7 +565,7 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
 
     GMX_ASSERT(gmx_omp_nthreads_get(emntNonbonded) == nbl_fep.ssize(), "Number of lists should be same as number of NB threads");
 
-    wallcycle_sub_start(wcycle, ewcsNONBONDED_FEP);
+    wallcycle_sub_start(wcycle_, ewcsNONBONDED_FEP);
 #pragma omp parallel for schedule(static) num_threads(nbl_fep.ssize())
     for (int th = 0; th < nbl_fep.ssize(); th++)
     {
@@ -624,5 +622,5 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocali
             enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
         }
     }
-    wallcycle_sub_stop(wcycle, ewcsNONBONDED_FEP);
+    wallcycle_sub_stop(wcycle_, ewcsNONBONDED_FEP);
 }
index 681dcf2d8bbc8c36ee72743ca25b25d5810ccbbc..77119044cbcd08cc30362feb737b8c6e5ce4d0ea 100644 (file)
@@ -136,11 +136,10 @@ void nonbonded_verlet_t::setCoordinates(const Nbnxm::AtomLocality       locality
                                         const bool                      fillLocal,
                                         gmx::ArrayRef<const gmx::RVec>  x,
                                         BufferOpsUseGpu                 useGpu,
-                                        void                           *xPmeDevicePtr,
-                                        gmx_wallcycle                  *wcycle)
+                                        void                           *xPmeDevicePtr)
 {
-    wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-    wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
+    wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_start(wcycle_, ewcsNB_X_BUF_OPS);
 
     auto fnPtr = (useGpu == BufferOpsUseGpu::True) ?
         nbnxn_atomdata_copy_x_to_nbat_x<true> :
@@ -150,8 +149,8 @@ void nonbonded_verlet_t::setCoordinates(const Nbnxm::AtomLocality       locality
           as_rvec_array(x.data()),
           nbat.get(), gpu_nbv, xPmeDevicePtr);
 
-    wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
-    wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_stop(wcycle_, ewcsNB_X_BUF_OPS);
+    wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
 }
 
 gmx::ArrayRef<const int> nonbonded_verlet_t::getGridIndices() const
@@ -163,8 +162,7 @@ void
 nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality           locality,
                                              rvec                               *f,
                                              BufferOpsUseGpu                     useGpu,
-                                             GpuBufferOpsAccumulateForce         accumulateForce,
-                                             gmx_wallcycle                      *wcycle)
+                                             GpuBufferOpsAccumulateForce         accumulateForce)
 {
 
     GMX_ASSERT(!((useGpu == BufferOpsUseGpu::False) &&
@@ -178,22 +176,22 @@ nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality
         return;
     }
 
-    wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-    wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+    wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_start(wcycle_, ewcsNB_F_BUF_OPS);
 
     auto fn = useGpu == BufferOpsUseGpu::True ? reduceForces<true> : reduceForces<false>;
     fn(nbat.get(), locality, pairSearch_->gridSet(), f, gpu_nbv, accumulateForce);
 
-    wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
-    wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_stop(wcycle_, ewcsNB_F_BUF_OPS);
+    wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
 }
 
 void
-nonbonded_verlet_t::atomdata_init_add_nbat_f_to_f_gpu(gmx_wallcycle *wcycle)
+nonbonded_verlet_t::atomdata_init_add_nbat_f_to_f_gpu()
 {
 
-    wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-    wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+    wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_start(wcycle_, ewcsNB_F_BUF_OPS);
 
     const Nbnxm::GridSet      &gridSet = pairSearch_->gridSet();
 
@@ -201,8 +199,8 @@ nonbonded_verlet_t::atomdata_init_add_nbat_f_to_f_gpu(gmx_wallcycle *wcycle)
                                           gpu_nbv,
                                           gridSet.numRealAtomsTotal());
 
-    wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
-    wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_stop(wcycle_, ewcsNB_F_BUF_OPS);
+    wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
 }
 
 real nonbonded_verlet_t::pairlistInnerRadius() const
index 7a6491a4aac4538d34e0819d022b94a12c4fdbce..a749b074b9f468f8333519cf79799705b984f4a5 100644 (file)
@@ -212,7 +212,8 @@ struct nonbonded_verlet_t
                            std::unique_ptr<PairSearch>        pairSearch,
                            std::unique_ptr<nbnxn_atomdata_t>  nbat,
                            const Nbnxm::KernelSetup          &kernelSetup,
-                           gmx_nbnxn_gpu_t                   *gpu_nbv);
+                           gmx_nbnxn_gpu_t                   *gpu_nbv,
+                           gmx_wallcycle                     *wcycle);
 
         ~nonbonded_verlet_t();
 
@@ -261,8 +262,7 @@ struct nonbonded_verlet_t
                             bool                            fillLocal,
                             gmx::ArrayRef<const gmx::RVec>  x,
                             BufferOpsUseGpu                 useGpu,
-                            void                           *xPmeDevicePtr,
-                            gmx_wallcycle                  *wcycle);
+                            void                           *xPmeDevicePtr);
 
         //! Init for GPU version of setup coordinates in Nbnxm
         void atomdata_init_copy_x_to_nbat_x_gpu();
@@ -296,8 +296,7 @@ struct nonbonded_verlet_t
                                      int                         clearF,
                                      const t_forcerec           &fr,
                                      gmx_enerdata_t             *enerd,
-                                     t_nrnb                     *nrnb,
-                                     gmx_wallcycle              *wcycle);
+                                     t_nrnb                     *nrnb);
 
         //! Executes the non-bonded free-energy kernel, always runs on the CPU
         void dispatchFreeEnergyKernel(Nbnxm::InteractionLocality  iLocality,
@@ -309,18 +308,16 @@ struct nonbonded_verlet_t
                                       real                       *lambda,
                                       gmx_enerdata_t             *enerd,
                                       int                         forceFlags,
-                                      t_nrnb                     *nrnb,
-                                      gmx_wallcycle              *wcycle);
+                                      t_nrnb                     *nrnb);
 
         //! Add the forces stored in nbat to f, zeros the forces in nbat */
         void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality                 locality,
                                       rvec                               *f,
                                       BufferOpsUseGpu                     useGpu,
-                                      GpuBufferOpsAccumulateForce         accumulateForce,
-                                      gmx_wallcycle                      *wcycle);
+                                      GpuBufferOpsAccumulateForce         accumulateForce);
 
         /*! \brief Outer body of function to perform initialization for F buffer operations on GPU. */
-        void atomdata_init_add_nbat_f_to_f_gpu(gmx_wallcycle *wcycle);
+        void atomdata_init_add_nbat_f_to_f_gpu();
 
         /*! \brief H2D transfer of force buffer*/
         void launch_copy_f_to_gpu(rvec *f, Nbnxm::AtomLocality locality);
@@ -378,6 +375,8 @@ struct nonbonded_verlet_t
     private:
         //! The non-bonded setup, also affects the pairlist construction kernel
         Nbnxm::KernelSetup                kernelSetup_;
+        //! \brief Pointer to wallcycle structure.
+        gmx_wallcycle                    *wcycle_;
     public:
         //! GPU Nbnxm data, only used with a physical GPU (TODO: use unique_ptr)
         gmx_nbnxn_gpu_t                  *gpu_nbv;
@@ -396,7 +395,8 @@ init_nb_verlet(const gmx::MDLogger     &mdlog,
                const gmx_hw_info_t     &hardwareInfo,
                const gmx_device_info_t *deviceInfo,
                const gmx_mtop_t        *mtop,
-               matrix                   box);
+               matrix                   box,
+               gmx_wallcycle           *wcycle);
 
 } // namespace Nbnxm
 
index 18c8182dd5c944865856034b83581eaa51bcb7db..6715fef9c2c5cca5936fbd9d140159110c7896ac 100644 (file)
@@ -364,7 +364,8 @@ init_nb_verlet(const gmx::MDLogger     &mdlog,
                const gmx_hw_info_t     &hardwareInfo,
                const gmx_device_info_t *deviceInfo,
                const gmx_mtop_t        *mtop,
-               matrix                   box)
+               matrix                   box,
+               gmx_wallcycle           *wcycle)
 {
     const bool          emulateGpu = (getenv("GMX_EMULATE_GPU") != nullptr);
     const bool          useGpu     = deviceInfo != nullptr;
@@ -482,7 +483,8 @@ init_nb_verlet(const gmx::MDLogger     &mdlog,
                                                 std::move(pairSearch),
                                                 std::move(nbat),
                                                 kernelSetup,
-                                                gpu_nbv);
+                                                gpu_nbv,
+                                                wcycle);
 }
 
 } // namespace Nbnxm
@@ -491,11 +493,13 @@ nonbonded_verlet_t::nonbonded_verlet_t(std::unique_ptr<PairlistSets>      pairli
                                        std::unique_ptr<PairSearch>        pairSearch,
                                        std::unique_ptr<nbnxn_atomdata_t>  nbat_in,
                                        const Nbnxm::KernelSetup          &kernelSetup,
-                                       gmx_nbnxn_gpu_t                   *gpu_nbv_ptr) :
+                                       gmx_nbnxn_gpu_t                   *gpu_nbv_ptr,
+                                       gmx_wallcycle                     *wcycle) :
     pairlistSets_(std::move(pairlistSets)),
     pairSearch_(std::move(pairSearch)),
     nbat(std::move(nbat_in)),
     kernelSetup_(kernelSetup),
+    wcycle_(wcycle),
     gpu_nbv(gpu_nbv_ptr)
 {
     GMX_RELEASE_ASSERT(pairlistSets_, "Need valid pairlistSets");
index 899b9489be6af794447aebaebf98b4fac1f85bd9..cc5e3d08d909826a632522236004e85e5e39004b 100644 (file)
@@ -37,6 +37,7 @@
 
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
 #include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/timing/wallcycle.h"
 #include "gromacs/utility/gmxassert.h"
 
 #include "clusterdistancekerneltype.h"
@@ -97,9 +98,15 @@ nonbonded_verlet_t::dispatchPruneKernelCpu(const Nbnxm::InteractionLocality  iLo
 
 void nonbonded_verlet_t::dispatchPruneKernelGpu(int64_t step)
 {
+    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_start_nocount(wcycle_, ewcsLAUNCH_GPU_NONBONDED);
+
     const bool stepIsEven = (pairlistSets().numStepsWithPairlist(step) % 2 == 0);
 
     Nbnxm::gpu_launch_kernel_pruneonly(gpu_nbv,
                                        stepIsEven ? Nbnxm::InteractionLocality::Local : Nbnxm::InteractionLocality::NonLocal,
                                        pairlistSets().params().numRollingPruningParts);
+
+    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_NONBONDED);
+    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
 }