Cleaning up do_force_cutsVERLET function from wallcycle calls
authorPrashanth Kanduri <kanduri@cscs.ch>
Mon, 18 Dec 2017 09:05:04 +0000 (10:05 +0100)
committerPrashanth Kanduri <kanduri@cscs.ch>
Wed, 7 Feb 2018 14:17:37 +0000 (15:17 +0100)
Change-Id: I50bead1f0760e2f61f5002d33bba8fc6e85692b5

14 files changed:
src/gromacs/domdec/domdec.cpp
src/gromacs/domdec/domdec.h
src/gromacs/ewald/pme-gpu.cpp
src/gromacs/ewald/pme-only.cpp
src/gromacs/ewald/pme-pp.cpp
src/gromacs/ewald/pme.cpp
src/gromacs/ewald/pme.h
src/gromacs/mdlib/nbnxn_atomdata.cpp
src/gromacs/mdlib/nbnxn_atomdata.h
src/gromacs/mdlib/sim_util.cpp
src/gromacs/mdlib/vsite.cpp
src/gromacs/mdlib/vsite.h
src/gromacs/timing/wallcycle.cpp
src/gromacs/timing/wallcycle.h

index bb90e31364ccca0303f19e7ca61bc8f4339d5eb2..f3c8eb9d0d9069f368e3f56888b1897b1695e526 100644 (file)
@@ -390,8 +390,10 @@ void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
     *at_end   = dd->comm->nat[ddnatCON];
 }
 
-void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
+void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[], gmx_wallcycle *wcycle)
 {
+    wallcycle_start(wcycle, ewcMOVEX);
+
     int                    nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
     int                   *index, *cgindex;
     gmx_domdec_comm_t     *comm;
@@ -498,10 +500,14 @@ void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
         }
         nzone += nzone;
     }
+
+    wallcycle_stop(wcycle, ewcMOVEX);
 }
 
-void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
+void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift, gmx_wallcycle *wcycle)
 {
+    wallcycle_start(wcycle, ewcMOVEF);
+
     int                    nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
     int                   *index, *cgindex;
     gmx_domdec_comm_t     *comm;
@@ -619,6 +625,7 @@ void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
         }
         nzone /= 2;
     }
+    wallcycle_stop(wcycle, ewcMOVEF);
 }
 
 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
@@ -9195,7 +9202,7 @@ void dd_partition_system(FILE                *fplog,
                          gmx_vsite_t         *vsite,
                          gmx_constr_t         constr,
                          t_nrnb              *nrnb,
-                         gmx_wallcycle_t      wcycle,
+                         gmx_wallcycle       *wcycle,
                          gmx_bool             bVerbose)
 {
     gmx_domdec_t      *dd;
@@ -9808,7 +9815,7 @@ void dd_partition_system(FILE                *fplog,
 
     if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
     {
-        dd_move_x(dd, state_local->box, as_rvec_array(state_local->x.data()));
+        dd_move_x(dd, state_local->box, as_rvec_array(state_local->x.data()), nullWallcycle);
         write_dd_pdb("dd_dump", step, "dump", top_global, cr,
                      -1, as_rvec_array(state_local->x.data()), state_local->box);
     }
index 81f836ab3439d274615e891b168714a194040f9b..d058c2f78a366f02b8a5cf60f0425e5cadaac046 100644 (file)
@@ -62,7 +62,6 @@
 
 #include "gromacs/math/paddedvector.h"
 #include "gromacs/math/vectypes.h"
-#include "gromacs/timing/wallcycle.h"
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/real.h"
@@ -82,6 +81,7 @@ struct t_forcerec;
 struct t_inputrec;
 struct t_mdatoms;
 struct t_nrnb;
+struct gmx_wallcycle;
 class t_state;
 
 namespace gmx
@@ -288,14 +288,14 @@ void dd_force_flop_stop(struct gmx_domdec_t *dd, t_nrnb *nrnb);
 float dd_pme_f_ratio(struct gmx_domdec_t *dd);
 
 /*! \brief Communicate the coordinates to the neighboring cells and do pbc. */
-void dd_move_x(struct gmx_domdec_t *dd, matrix box, rvec x[]);
+void dd_move_x(struct gmx_domdec_t *dd, matrix box, rvec x[], gmx_wallcycle *wcycle);
 
 /*! \brief Sum the forces over the neighboring cells.
  *
  * When fshift!=NULL the shift forces are updated to obtain
  * the correct virial from the single sum including f.
  */
-void dd_move_f(struct gmx_domdec_t *dd, rvec f[], rvec *fshift);
+void dd_move_f(struct gmx_domdec_t *dd, rvec f[], rvec *fshift, gmx_wallcycle *wcycle);
 
 /*! \brief Communicate a real for each atom to the neighboring cells. */
 void dd_atom_spread_real(struct gmx_domdec_t *dd, real v[]);
@@ -326,7 +326,7 @@ void dd_partition_system(FILE                *fplog,
                          gmx_vsite_t         *vsite,
                          struct gmx_constr   *constr,
                          t_nrnb              *nrnb,
-                         gmx_wallcycle_t      wcycle,
+                         gmx_wallcycle       *wcycle,
                          gmx_bool             bVerbose);
 
 /*! \brief Reset all the statistics and counters for total run counting */
index ab13d44db58e6168f2f955ff0361e3e3f00f8e90..ba53868eb3cdfbe71888d0d4b6de7d2c8e1ef8ac 100644 (file)
@@ -170,7 +170,7 @@ void inline parallel_3dfft_execute_gpu_wrapper(gmx_pme_t              *pme,
 void pme_gpu_prepare_computation(gmx_pme_t            *pme,
                                  bool                  needToUpdateBox,
                                  const matrix          box,
-                                 gmx_wallcycle_t       wcycle,
+                                 gmx_wallcycle        *wcycle,
                                  int                   flags)
 {
     GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
@@ -213,7 +213,7 @@ void pme_gpu_prepare_computation(gmx_pme_t            *pme,
 
 void pme_gpu_launch_spread(gmx_pme_t            *pme,
                            const rvec           *x,
-                           gmx_wallcycle_t       wcycle)
+                           gmx_wallcycle        *wcycle)
 {
     GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
 
@@ -242,7 +242,7 @@ void pme_gpu_launch_spread(gmx_pme_t            *pme,
 }
 
 void pme_gpu_launch_complex_transforms(gmx_pme_t      *pme,
-                                       gmx_wallcycle_t wcycle)
+                                       gmx_wallcycle  *wcycle)
 {
     PmeGpu            *pmeGpu                 = pme->gpu;
     const bool         computeEnergyAndVirial = pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR;
@@ -298,7 +298,7 @@ void pme_gpu_launch_complex_transforms(gmx_pme_t      *pme,
 }
 
 void pme_gpu_launch_gather(const gmx_pme_t                 *pme,
-                           gmx_wallcycle_t gmx_unused       wcycle,
+                           gmx_wallcycle gmx_unused        *wcycle,
                            PmeForceOutputHandling           forceTreatment)
 {
     GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
@@ -346,7 +346,7 @@ static void pme_gpu_get_staged_results(const gmx_pme_t                *pme,
 }
 
 bool pme_gpu_try_finish_task(const gmx_pme_t                *pme,
-                             gmx_wallcycle_t                 wcycle,
+                             gmx_wallcycle                  *wcycle,
                              gmx::ArrayRef<const gmx::RVec> *forces,
                              matrix                          virial,
                              real                           *energy,
@@ -391,7 +391,7 @@ bool pme_gpu_try_finish_task(const gmx_pme_t                *pme,
 }
 
 void pme_gpu_wait_finish_task(const gmx_pme_t                *pme,
-                              gmx_wallcycle_t                 wcycle,
+                              gmx_wallcycle                  *wcycle,
                               gmx::ArrayRef<const gmx::RVec> *forces,
                               matrix                          virial,
                               real                           *energy)
index 509744d934654e85f8ea806adf792cb27e9a012b..34d81fafe900dcae70cc4e5457fd7f61d107c4f9 100644 (file)
@@ -534,7 +534,7 @@ static void gmx_pme_send_force_vir_ener(gmx_pme_pp *pme_pp,
 
 int gmx_pmeonly(struct gmx_pme_t *pme,
                 t_commrec *cr,    t_nrnb *mynrnb,
-                gmx_wallcycle_t wcycle,
+                gmx_wallcycle  *wcycle,
                 gmx_walltime_accounting_t walltime_accounting,
                 t_inputrec *ir, PmeRunMode runMode)
 {
index 837cc1d1afa5b384f0f0392545b1252c929cf7a3..67ee1d773d4af02a39931c3e14e0c669e433b16e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -61,6 +61,7 @@
 #include "gromacs/mdtypes/forceoutput.h"
 #include "gromacs/mdtypes/interaction_const.h"
 #include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/timing/wallcycle.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/gmxmpi.h"
 #include "gromacs/utility/smalloc.h"
@@ -246,8 +247,10 @@ void gmx_pme_send_parameters(t_commrec *cr,
 void gmx_pme_send_coordinates(t_commrec *cr, matrix box, rvec *x,
                               real lambda_q, real lambda_lj,
                               gmx_bool bEnerVir,
-                              gmx_int64_t step)
+                              gmx_int64_t step, gmx_wallcycle *wcycle)
 {
+    wallcycle_start(wcycle, ewcPP_PMESENDX);
+
     unsigned int flags = PP_PME_COORD;
     if (bEnerVir)
     {
@@ -255,6 +258,8 @@ void gmx_pme_send_coordinates(t_commrec *cr, matrix box, rvec *x,
     }
     gmx_pme_send_coeffs_coords(cr, flags, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
                                box, x, lambda_q, lambda_lj, 0, 0, step);
+
+    wallcycle_stop(wcycle, ewcPP_PMESENDX);
 }
 
 void gmx_pme_send_finish(t_commrec *cr)
index fe2f007a0567634ecf0e56f54c03713a390e7c49..0f64912941405b96f8109b44d4f9171b301b22df 100644 (file)
@@ -965,7 +965,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
                real sigmaA[],   real sigmaB[],
                matrix box,      t_commrec *cr,
                int  maxshift_x, int maxshift_y,
-               t_nrnb *nrnb,    gmx_wallcycle_t wcycle,
+               t_nrnb *nrnb,    gmx_wallcycle *wcycle,
                matrix vir_q,    matrix vir_lj,
                real *energy_q,  real *energy_lj,
                real lambda_q,   real lambda_lj,
index d8d8838ddb0bc3cfa3a12422ad3796aef3d35b16..4b9972cfbc6cec44582b7bb359ef05f97fcaa9af 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,7 +51,6 @@
 #include <string>
 
 #include "gromacs/math/vectypes.h"
-#include "gromacs/timing/wallcycle.h"
 #include "gromacs/timing/walltime_accounting.h"
 #include "gromacs/utility/arrayref.h"
 #include "gromacs/utility/basedefinitions.h"
@@ -65,6 +64,7 @@ struct PmeGpu;
 struct gmx_wallclock_gpu_pme_t;
 struct gmx_device_info_t;
 struct gmx_pme_t;
+struct gmx_wallcycle;
 
 enum class GpuTaskCompletion;
 
@@ -167,7 +167,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
                real sigmaA[],   real sigmaB[],
                matrix box,      t_commrec *cr,
                int  maxshift_x, int maxshift_y,
-               t_nrnb *nrnb,    gmx_wallcycle_t wcycle,
+               t_nrnb *nrnb,    gmx_wallcycle *wcycle,
                matrix vir_q,    matrix vir_lj,
                real *energy_q,  real *energy_lj,
                real lambda_q,   real lambda_lj,
@@ -177,7 +177,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
 /*! \brief Called on the nodes that do PME exclusively (as slaves) */
 int gmx_pmeonly(struct gmx_pme_t *pme,
                 struct t_commrec *cr,     t_nrnb *mynrnb,
-                gmx_wallcycle_t wcycle,
+                gmx_wallcycle  *wcycle,
                 gmx_walltime_accounting_t walltime_accounting,
                 t_inputrec *ir, PmeRunMode runMode);
 
@@ -204,7 +204,7 @@ void gmx_pme_send_parameters(struct t_commrec *cr,
 void gmx_pme_send_coordinates(struct t_commrec *cr, matrix box, rvec *x,
                               real lambda_q, real lambda_lj,
                               gmx_bool bEnerVir,
-                              gmx_int64_t step);
+                              gmx_int64_t step, gmx_wallcycle *wcycle);
 
 /*! \brief Tell our PME-only node to finish */
 void gmx_pme_send_finish(struct t_commrec *cr);
@@ -296,7 +296,7 @@ void pme_gpu_get_timings(const gmx_pme_t         *pme,
 void pme_gpu_prepare_computation(gmx_pme_t      *pme,
                                  bool            needToUpdateBox,
                                  const matrix    box,
-                                 gmx_wallcycle_t wcycle,
+                                 gmx_wallcycle  *wcycle,
                                  int             flags);
 
 /*! \brief
@@ -308,7 +308,7 @@ void pme_gpu_prepare_computation(gmx_pme_t      *pme,
  */
 void pme_gpu_launch_spread(gmx_pme_t      *pme,
                            const rvec     *x,
-                           gmx_wallcycle_t wcycle);
+                           gmx_wallcycle  *wcycle);
 
 /*! \brief
  * Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode.
@@ -317,7 +317,7 @@ void pme_gpu_launch_spread(gmx_pme_t      *pme,
  * \param[in] wcycle            The wallclock counter.
  */
 void pme_gpu_launch_complex_transforms(gmx_pme_t       *pme,
-                                       gmx_wallcycle_t  wcycle);
+                                       gmx_wallcycle   *wcycle);
 
 /*! \brief
  * Launches last stage of PME on GPU - force gathering and D2H force transfer.
@@ -329,7 +329,7 @@ void pme_gpu_launch_complex_transforms(gmx_pme_t       *pme,
  *                               and accumulates. The reduction is non-atomic.
  */
 void pme_gpu_launch_gather(const gmx_pme_t        *pme,
-                           gmx_wallcycle_t         wcycle,
+                           gmx_wallcycle          *wcycle,
                            PmeForceOutputHandling  forceTreatment);
 
 /*! \brief
@@ -343,7 +343,7 @@ void pme_gpu_launch_gather(const gmx_pme_t        *pme,
  * \param[out] energy         The output energy.
  */
 void pme_gpu_wait_finish_task(const gmx_pme_t                *pme,
-                              gmx_wallcycle_t                 wcycle,
+                              gmx_wallcycle                  *wcycle,
                               gmx::ArrayRef<const gmx::RVec> *forces,
                               matrix                          virial,
                               real                           *energy);
@@ -369,7 +369,7 @@ void pme_gpu_wait_finish_task(const gmx_pme_t                *pme,
  * \returns                   True if the PME GPU tasks have completed
  */
 bool pme_gpu_try_finish_task(const gmx_pme_t                *pme,
-                             gmx_wallcycle_t                 wcycle,
+                             gmx_wallcycle                  *wcycle,
                              gmx::ArrayRef<const gmx::RVec> *forces,
                              matrix                          virial,
                              real                           *energy,
index 5c45f4b496448d67f6dce94c53c2dd1f76df32a2..1d37c91145bf51f75d057adac2e86ccf7bb508ec 100644 (file)
@@ -60,6 +60,7 @@
 #include "gromacs/mdtypes/mdatom.h"
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/simd/simd.h"
+#include "gromacs/timing/wallcycle.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/gmxomp.h"
@@ -1059,8 +1060,12 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search_t nbs,
                                      int                  locality,
                                      gmx_bool             FillLocal,
                                      rvec                *x,
-                                     nbnxn_atomdata_t    *nbat)
+                                     nbnxn_atomdata_t    *nbat,
+                                     gmx_wallcycle       *wcycle)
 {
+    wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
+
     int g0 = 0, g1 = 0;
     int nth, th;
 
@@ -1129,6 +1134,9 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search_t nbs,
         }
         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
     }
+
+    wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
+    wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 }
 
 static void
@@ -1516,8 +1524,12 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nba
 void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
                                     int                     locality,
                                     const nbnxn_atomdata_t *nbat,
-                                    rvec                   *f)
+                                    rvec                   *f,
+                                    gmx_wallcycle          *wcycle)
 {
+    wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+
     int a0 = 0, na = 0;
 
     nbs_cycle_start(&nbs->cc[enbsCCreducef]);
@@ -1575,6 +1587,9 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
     }
 
     nbs_cycle_stop(&nbs->cc[enbsCCreducef]);
+
+    wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
+    wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 }
 
 /* Adds the shift forces from nbnxn_atomdata_t to fshift */
index d1d594e9e5f15915ebe405d6a9b8b860a0e4e288..d6c7eaf017b9e904afb72917589b8b81fd7719c9 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -49,6 +49,7 @@ class MDLogger;
 }
 
 struct t_mdatoms;
+struct gmx_wallcycle;
 
 /* Default nbnxn allocation routine, allocates 32 byte aligned,
  * which works for plain C and aligned SSE and AVX loads/stores.
@@ -112,13 +113,15 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search_t nbs,
                                      int                  locality,
                                      gmx_bool             FillLocal,
                                      rvec                *x,
-                                     nbnxn_atomdata_t    *nbat);
+                                     nbnxn_atomdata_t    *nbat,
+                                     gmx_wallcycle       *wcycle);
 
 /* Add the forces stored in nbat to f, zeros the forces in nbat */
 void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
                                     int                     locality,
                                     const nbnxn_atomdata_t *nbat,
-                                    rvec                   *f);
+                                    rvec                   *f,
+                                    gmx_wallcycle          *wcycle);
 
 /* Add the fshift force stored in nbat to fshift */
 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
index 5038cb6fc2a29b0e13cbbb68a994729c1ffda29e..483f10e0637469247fdfd91267c0dc87fa701519 100644 (file)
@@ -384,14 +384,12 @@ static void post_process_forces(t_commrec *cr,
              * This is parallellized. MPI communication is performed
              * if the constructing atoms aren't local.
              */
-            wallcycle_start(wcycle, ewcVSITESPREAD);
             matrix virial = { { 0 } };
             spread_vsite_f(vsite, x, fDirectVir, nullptr,
                            (flags & GMX_FORCE_VIRIAL), virial,
                            nrnb,
-                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle);
             forceWithVirial->addVirialContribution(virial);
-            wallcycle_stop(wcycle, ewcVSITESPREAD);
         }
 
         if (flags & GMX_FORCE_VIRIAL)
@@ -991,12 +989,8 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t             *nbv,
                 wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
                 wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
 
-                wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-                wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
                 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal,
-                                               nbv->nbat, as_rvec_array(force->data()));
-                wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
-                wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+                                               nbv->nbat, as_rvec_array(force->data()), wcycle);
             }
         }
     }
@@ -1154,12 +1148,10 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
          * and domain decomposition does not use the graph,
          * we do not need to worry about shifting.
          */
-        wallcycle_start(wcycle, ewcPP_PMESENDX);
         gmx_pme_send_coordinates(cr, box, as_rvec_array(x.data()),
                                  lambda[efptCOUL], lambda[efptVDW],
                                  (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
-                                 step);
-        wallcycle_stop(wcycle, ewcPP_PMESENDX);
+                                 step, wcycle);
     }
 #endif /* GMX_MPI */
 
@@ -1256,12 +1248,8 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
     }
     else
     {
-        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-        wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
         nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, FALSE, as_rvec_array(x.data()),
-                                        nbv->nbat);
-        wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
-        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+                                        nbv->nbat, wcycle);
     }
 
     if (bUseGPU)
@@ -1324,16 +1312,10 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
         }
         else
         {
-            wallcycle_start(wcycle, ewcMOVEX);
-            dd_move_x(cr->dd, box, as_rvec_array(x.data()));
-            wallcycle_stop(wcycle, ewcMOVEX);
+            dd_move_x(cr->dd, box, as_rvec_array(x.data()), wcycle);
 
-            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
             nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatNonlocal, FALSE, as_rvec_array(x.data()),
-                                            nbv->nbat);
-            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
-            wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+                                            nbv->nbat, wcycle);
         }
 
         if (bUseGPU)
@@ -1509,11 +1491,9 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
          * communication with calculation with domain decomposition.
          */
         wallcycle_stop(wcycle, ewcFORCE);
-        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
-        nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->nbat, f);
-        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
-        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+
+        nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->nbat, f, wcycle);
+
         wallcycle_start_nocount(wcycle, ewcFORCE);
 
         /* if there are multiple fshift output buffers reduce them */
@@ -1568,16 +1548,13 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
                              step, nrnb, wcycle);
                 wallcycle_stop(wcycle, ewcFORCE);
             }
-            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-            wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+
             /* skip the reduction if there was no non-local work to do */
             if (nbv->grp[eintNonlocal].nbl_lists.nbl[0]->nsci > 0)
             {
                 nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatNonlocal,
-                                               nbv->nbat, f);
+                                               nbv->nbat, f, wcycle);
             }
-            wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
-            wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
         }
     }
 
@@ -1594,9 +1571,7 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
         }
         if (bDoForces)
         {
-            wallcycle_start(wcycle, ewcMOVEF);
-            dd_move_f(cr->dd, f, fr->fshift);
-            wallcycle_stop(wcycle, ewcMOVEF);
+            dd_move_f(cr->dd, f, fr->fshift, wcycle);
         }
     }
 
@@ -1684,12 +1659,8 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
      * on the non-alternating path. */
     if (bUseOrEmulGPU && !alternateGpuWait)
     {
-        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
         nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal,
-                                       nbv->nbat, f);
-        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
-        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+                                       nbv->nbat, f, wcycle);
     }
 
     if (DOMAINDECOMP(cr))
@@ -1704,10 +1675,8 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
          */
         if (vsite && !(fr->haveDirectVirialContributions && !(flags & GMX_FORCE_VIRIAL)))
         {
-            wallcycle_start(wcycle, ewcVSITESPREAD);
             spread_vsite_f(vsite, as_rvec_array(x.data()), f, fr->fshift, FALSE, nullptr, nrnb,
-                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
-            wallcycle_stop(wcycle, ewcVSITESPREAD);
+                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle);
         }
 
         if (flags & GMX_FORCE_VIRIAL)
@@ -1851,21 +1820,18 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
          * and domain decomposition does not use the graph,
          * we do not need to worry about shifting.
          */
-        wallcycle_start(wcycle, ewcPP_PMESENDX);
         gmx_pme_send_coordinates(cr, box, as_rvec_array(x.data()),
                                  lambda[efptCOUL], lambda[efptVDW],
                                  (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
-                                 step);
-        wallcycle_stop(wcycle, ewcPP_PMESENDX);
+                                 step, wcycle);
     }
 #endif /* GMX_MPI */
 
     /* Communicate coordinates and sum dipole if necessary */
     if (DOMAINDECOMP(cr))
     {
-        wallcycle_start(wcycle, ewcMOVEX);
-        dd_move_x(cr->dd, box, as_rvec_array(x.data()));
-        wallcycle_stop(wcycle, ewcMOVEX);
+        dd_move_x(cr->dd, box, as_rvec_array(x.data()), wcycle);
+
         /* No GPU support, no move_x overlap, so reopen the balance region here */
         if (ddOpenBalanceRegion == DdOpenBalanceRegionBeforeForceComputation::yes)
         {
@@ -2010,8 +1976,7 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
         /* Communicate the forces */
         if (DOMAINDECOMP(cr))
         {
-            wallcycle_start(wcycle, ewcMOVEF);
-            dd_move_f(cr->dd, f, fr->fshift);
+            dd_move_f(cr->dd, f, fr->fshift, wcycle);
             /* Do we need to communicate the separate force array
              * for terms that do not contribute to the single sum virial?
              * Position restraints and electric fields do not introduce
@@ -2022,9 +1987,8 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
             if (EEL_FULL(fr->ic->eeltype) && cr->dd->n_intercg_excl &&
                 (flags & GMX_FORCE_VIRIAL))
             {
-                dd_move_f(cr->dd, as_rvec_array(forceWithVirial.force_.data()), nullptr);
+                dd_move_f(cr->dd, as_rvec_array(forceWithVirial.force_.data()), nullptr, wcycle);
             }
-            wallcycle_stop(wcycle, ewcMOVEF);
         }
 
         /* If we have NoVirSum forces, but we do not calculate the virial,
@@ -2032,10 +1996,8 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
          */
         if (vsite && !(fr->haveDirectVirialContributions && !(flags & GMX_FORCE_VIRIAL)))
         {
-            wallcycle_start(wcycle, ewcVSITESPREAD);
             spread_vsite_f(vsite, as_rvec_array(x.data()), f, fr->fshift, FALSE, nullptr, nrnb,
-                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
-            wallcycle_stop(wcycle, ewcVSITESPREAD);
+                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle);
         }
 
         if (flags & GMX_FORCE_VIRIAL)
index fc6b913bc061faf36b376b356859845b5dfe306f..38fd2e8ef6535d00df0542a7047b70ed7806b2dd 100644 (file)
@@ -55,6 +55,7 @@
 #include "gromacs/pbcutil/ishift.h"
 #include "gromacs/pbcutil/mshift.h"
 #include "gromacs/pbcutil/pbc.h"
+#include "gromacs/timing/wallcycle.h"
 #include "gromacs/topology/ifunc.h"
 #include "gromacs/topology/mtop_util.h"
 #include "gromacs/topology/topology.h"
@@ -64,7 +65,6 @@
 #include "gromacs/utility/gmxomp.h"
 #include "gromacs/utility/smalloc.h"
 
-
 /* The strategy used here for assigning virtual sites to (thread-)tasks
  * is as follows:
  *
@@ -1617,8 +1617,9 @@ void spread_vsite_f(const gmx_vsite_t *vsite,
                     gmx_bool VirCorr, matrix vir,
                     t_nrnb *nrnb, const t_idef *idef,
                     int ePBC, gmx_bool bMolPBC, const t_graph *g, const matrix box,
-                    t_commrec *cr)
+                    t_commrec *cr, gmx_wallcycle *wcycle)
 {
+    wallcycle_start(wcycle, ewcVSITESPREAD);
     const bool useDomdec = vsite->useDomdec;
     GMX_ASSERT(!useDomdec || (cr != nullptr && DOMAINDECOMP(cr)), "When vsites are set up with domain decomposition, we need a valid commrec");
 
@@ -1819,6 +1820,8 @@ void spread_vsite_f(const gmx_vsite_t *vsite,
     inc_nrnb(nrnb, eNR_VSITE4FD, vsite_count(idef->il, F_VSITE4FD));
     inc_nrnb(nrnb, eNR_VSITE4FDN, vsite_count(idef->il, F_VSITE4FDN));
     inc_nrnb(nrnb, eNR_VSITEN,   vsite_count(idef->il, F_VSITEN));
+
+    wallcycle_stop(wcycle, ewcVSITESPREAD);
 }
 
 /*! \brief Returns the an array with charge-group indices for each atom
index b45930848980d982ae1d16e396690ab8ae68f8db..ae84408b81d59602615f1069ebe2b22c84fc6d87 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,7 @@ struct t_graph;
 struct t_ilist;
 struct t_mdatoms;
 struct t_nrnb;
+struct gmx_wallcycle;
 
 typedef struct gmx_vsite_t {
     gmx_bool             bHaveChargeGroups;    /* Do we have charge groups?               */
@@ -102,7 +103,7 @@ void spread_vsite_f(const gmx_vsite_t *vsite,
                     gmx_bool VirCorr, matrix vir,
                     t_nrnb *nrnb, const t_idef *idef,
                     int ePBC, gmx_bool bMolPBC, const t_graph *g, const matrix box,
-                    t_commrec *cr);
+                    t_commrec *cr, gmx_wallcycle *wcycle);
 /* Spread the force operating on the vsite atoms on the surrounding atoms.
  * If fshift!=NULL also update the shift forces.
  * If VirCorr=TRUE add the virial correction for non-linear vsite constructs
index 86f5a7d8bc7b8cee653955ce5141a623763fa737..914eb6284da9ed5b21f4b28ecb1fe67a1192379b 100644 (file)
@@ -75,7 +75,7 @@ typedef struct
     gmx_cycles_t start;
 } wallcc_t;
 
-typedef struct gmx_wallcycle
+struct gmx_wallcycle
 {
     wallcc_t        *wcc;
     /* did we detect one or more invalid cycle counts */
index 6896f87a95ac8f270860ec6b562c2677999c0d90..33291cdf4df252226d6792020abb172294fdf475 100644 (file)
@@ -46,6 +46,7 @@
 
 typedef struct gmx_wallcycle *gmx_wallcycle_t;
 struct t_commrec;
+const gmx_wallcycle_t nullWallcycle = nullptr;
 
 enum {
     ewcRUN, ewcSTEP, ewcPPDURINGPME, ewcDOMDEC, ewcDDCOMMLOAD,