Cleaning up do_force_cutsVERLET function from wallcycle calls

author Prashanth Kanduri <kanduri@cscs.ch>

Mon, 18 Dec 2017 09:05:04 +0000 (10:05 +0100)

committer Prashanth Kanduri <kanduri@cscs.ch>

Wed, 7 Feb 2018 14:17:37 +0000 (15:17 +0100)
author Prashanth Kanduri <kanduri@cscs.ch>
Mon, 18 Dec 2017 09:05:04 +0000 (10:05 +0100)
committer Prashanth Kanduri <kanduri@cscs.ch>
Wed, 7 Feb 2018 14:17:37 +0000 (15:17 +0100)
diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp

index bb90e31364ccca0303f19e7ca61bc8f4339d5eb2..f3c8eb9d0d9069f368e3f56888b1897b1695e526 100644 (file)
--- a/src/gromacs/domdec/domdec.cpp
+++ b/src/gromacs/domdec/domdec.cpp
@@ -390,8 +390,10 @@ void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
      *at_end   = dd->comm->nat[ddnatCON];
  }
  
-void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
+void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[], gmx_wallcycle *wcycle)
  {
+    wallcycle_start(wcycle, ewcMOVEX);
+
      int                    nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
      int                   *index, *cgindex;
      gmx_domdec_comm_t     *comm;
@@ -498,10 +500,14 @@ void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
          }
          nzone += nzone;
      }
+
+    wallcycle_stop(wcycle, ewcMOVEX);
  }
  
-void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
+void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift, gmx_wallcycle *wcycle)
  {
+    wallcycle_start(wcycle, ewcMOVEF);
+
      int                    nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
      int                   *index, *cgindex;
      gmx_domdec_comm_t     *comm;
@@ -619,6 +625,7 @@ void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
          }
          nzone /= 2;
      }
+    wallcycle_stop(wcycle, ewcMOVEF);
  }
  
  void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
@@ -9195,7 +9202,7 @@ void dd_partition_system(FILE                *fplog,
                           gmx_vsite_t         *vsite,
                           gmx_constr_t         constr,
                           t_nrnb              *nrnb,
-                         gmx_wallcycle_t      wcycle,
+                         gmx_wallcycle       *wcycle,
                           gmx_bool             bVerbose)
  {
      gmx_domdec_t      *dd;
@@ -9808,7 +9815,7 @@ void dd_partition_system(FILE                *fplog,
  
      if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
      {
-        dd_move_x(dd, state_local->box, as_rvec_array(state_local->x.data()));
+        dd_move_x(dd, state_local->box, as_rvec_array(state_local->x.data()), nullWallcycle);
          write_dd_pdb("dd_dump", step, "dump", top_global, cr,
                       -1, as_rvec_array(state_local->x.data()), state_local->box);
      }
diff --git a/src/gromacs/domdec/domdec.h b/src/gromacs/domdec/domdec.h

index 81f836ab3439d274615e891b168714a194040f9b..d058c2f78a366f02b8a5cf60f0425e5cadaac046 100644 (file)
--- a/src/gromacs/domdec/domdec.h
+++ b/src/gromacs/domdec/domdec.h
@@ -62,7 +62,6 @@
  
  #include "gromacs/math/paddedvector.h"
  #include "gromacs/math/vectypes.h"
-#include "gromacs/timing/wallcycle.h"
  #include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/basedefinitions.h"
  #include "gromacs/utility/real.h"
@@ -82,6 +81,7 @@ struct t_forcerec;
  struct t_inputrec;
  struct t_mdatoms;
  struct t_nrnb;
+struct gmx_wallcycle;
  class t_state;
  
  namespace gmx
@@ -288,14 +288,14 @@ void dd_force_flop_stop(struct gmx_domdec_t *dd, t_nrnb *nrnb);
  float dd_pme_f_ratio(struct gmx_domdec_t *dd);
  
  /*! \brief Communicate the coordinates to the neighboring cells and do pbc. */
-void dd_move_x(struct gmx_domdec_t *dd, matrix box, rvec x[]);
+void dd_move_x(struct gmx_domdec_t *dd, matrix box, rvec x[], gmx_wallcycle *wcycle);
  
  /*! \brief Sum the forces over the neighboring cells.
   *
   * When fshift!=NULL the shift forces are updated to obtain
   * the correct virial from the single sum including f.
   */
-void dd_move_f(struct gmx_domdec_t *dd, rvec f[], rvec *fshift);
+void dd_move_f(struct gmx_domdec_t *dd, rvec f[], rvec *fshift, gmx_wallcycle *wcycle);
  
  /*! \brief Communicate a real for each atom to the neighboring cells. */
  void dd_atom_spread_real(struct gmx_domdec_t *dd, real v[]);
@@ -326,7 +326,7 @@ void dd_partition_system(FILE                *fplog,
                           gmx_vsite_t         *vsite,
                           struct gmx_constr   *constr,
                           t_nrnb              *nrnb,
-                         gmx_wallcycle_t      wcycle,
+                         gmx_wallcycle       *wcycle,
                           gmx_bool             bVerbose);
  
  /*! \brief Reset all the statistics and counters for total run counting */
diff --git a/src/gromacs/ewald/pme-gpu.cpp b/src/gromacs/ewald/pme-gpu.cpp

index ab13d44db58e6168f2f955ff0361e3e3f00f8e90..ba53868eb3cdfbe71888d0d4b6de7d2c8e1ef8ac 100644 (file)
--- a/src/gromacs/ewald/pme-gpu.cpp
+++ b/src/gromacs/ewald/pme-gpu.cpp
@@ -170,7 +170,7 @@ void inline parallel_3dfft_execute_gpu_wrapper(gmx_pme_t              *pme,
  void pme_gpu_prepare_computation(gmx_pme_t            *pme,
                                   bool                  needToUpdateBox,
                                   const matrix          box,
-                                 gmx_wallcycle_t       wcycle,
+                                 gmx_wallcycle        *wcycle,
                                   int                   flags)
  {
      GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
@@ -213,7 +213,7 @@ void pme_gpu_prepare_computation(gmx_pme_t            *pme,
  
  void pme_gpu_launch_spread(gmx_pme_t            *pme,
                             const rvec           *x,
-                           gmx_wallcycle_t       wcycle)
+                           gmx_wallcycle        *wcycle)
  {
      GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
  
@@ -242,7 +242,7 @@ void pme_gpu_launch_spread(gmx_pme_t            *pme,
  }
  
  void pme_gpu_launch_complex_transforms(gmx_pme_t      *pme,
-                                       gmx_wallcycle_t wcycle)
+                                       gmx_wallcycle  *wcycle)
  {
      PmeGpu            *pmeGpu                 = pme->gpu;
      const bool         computeEnergyAndVirial = pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR;
@@ -298,7 +298,7 @@ void pme_gpu_launch_complex_transforms(gmx_pme_t      *pme,
  }
  
  void pme_gpu_launch_gather(const gmx_pme_t                 *pme,
-                           gmx_wallcycle_t gmx_unused       wcycle,
+                           gmx_wallcycle gmx_unused        *wcycle,
                             PmeForceOutputHandling           forceTreatment)
  {
      GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
@@ -346,7 +346,7 @@ static void pme_gpu_get_staged_results(const gmx_pme_t                *pme,
  }
  
  bool pme_gpu_try_finish_task(const gmx_pme_t                *pme,
-                             gmx_wallcycle_t                 wcycle,
+                             gmx_wallcycle                  *wcycle,
                               gmx::ArrayRef<const gmx::RVec> *forces,
                               matrix                          virial,
                               real                           *energy,
@@ -391,7 +391,7 @@ bool pme_gpu_try_finish_task(const gmx_pme_t                *pme,
  }
  
  void pme_gpu_wait_finish_task(const gmx_pme_t                *pme,
-                              gmx_wallcycle_t                 wcycle,
+                              gmx_wallcycle                  *wcycle,
                                gmx::ArrayRef<const gmx::RVec> *forces,
                                matrix                          virial,
                                real                           *energy)
diff --git a/src/gromacs/ewald/pme-only.cpp b/src/gromacs/ewald/pme-only.cpp

index 509744d934654e85f8ea806adf792cb27e9a012b..34d81fafe900dcae70cc4e5457fd7f61d107c4f9 100644 (file)
--- a/src/gromacs/ewald/pme-only.cpp
+++ b/src/gromacs/ewald/pme-only.cpp
@@ -534,7 +534,7 @@ static void gmx_pme_send_force_vir_ener(gmx_pme_pp *pme_pp,
  
  int gmx_pmeonly(struct gmx_pme_t *pme,
                  t_commrec *cr,    t_nrnb *mynrnb,
-                gmx_wallcycle_t wcycle,
+                gmx_wallcycle  *wcycle,
                  gmx_walltime_accounting_t walltime_accounting,
                  t_inputrec *ir, PmeRunMode runMode)
  {
diff --git a/src/gromacs/ewald/pme-pp.cpp b/src/gromacs/ewald/pme-pp.cpp

index 837cc1d1afa5b384f0f0392545b1252c929cf7a3..67ee1d773d4af02a39931c3e14e0c669e433b16e 100644 (file)
--- a/src/gromacs/ewald/pme-pp.cpp
+++ b/src/gromacs/ewald/pme-pp.cpp
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -61,6 +61,7 @@
  #include "gromacs/mdtypes/forceoutput.h"
  #include "gromacs/mdtypes/interaction_const.h"
  #include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/timing/wallcycle.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxmpi.h"
  #include "gromacs/utility/smalloc.h"
@@ -246,8 +247,10 @@ void gmx_pme_send_parameters(t_commrec *cr,
  void gmx_pme_send_coordinates(t_commrec *cr, matrix box, rvec *x,
                                real lambda_q, real lambda_lj,
                                gmx_bool bEnerVir,
-                              gmx_int64_t step)
+                              gmx_int64_t step, gmx_wallcycle *wcycle)
  {
+    wallcycle_start(wcycle, ewcPP_PMESENDX);
+
      unsigned int flags = PP_PME_COORD;
      if (bEnerVir)
      {
@@ -255,6 +258,8 @@ void gmx_pme_send_coordinates(t_commrec *cr, matrix box, rvec *x,
      }
      gmx_pme_send_coeffs_coords(cr, flags, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
                                 box, x, lambda_q, lambda_lj, 0, 0, step);
+
+    wallcycle_stop(wcycle, ewcPP_PMESENDX);
  }
  
  void gmx_pme_send_finish(t_commrec *cr)
diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp

index fe2f007a0567634ecf0e56f54c03713a390e7c49..0f64912941405b96f8109b44d4f9171b301b22df 100644 (file)
--- a/src/gromacs/ewald/pme.cpp
+++ b/src/gromacs/ewald/pme.cpp
@@ -965,7 +965,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
                 real sigmaA[],   real sigmaB[],
                 matrix box,      t_commrec *cr,
                 int  maxshift_x, int maxshift_y,
-               t_nrnb *nrnb,    gmx_wallcycle_t wcycle,
+               t_nrnb *nrnb,    gmx_wallcycle *wcycle,
                 matrix vir_q,    matrix vir_lj,
                 real *energy_q,  real *energy_lj,
                 real lambda_q,   real lambda_lj,
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h

index d8d8838ddb0bc3cfa3a12422ad3796aef3d35b16..4b9972cfbc6cec44582b7bb359ef05f97fcaa9af 100644 (file)
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -51,7 +51,6 @@
  #include <string>
  
  #include "gromacs/math/vectypes.h"
-#include "gromacs/timing/wallcycle.h"
  #include "gromacs/timing/walltime_accounting.h"
  #include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/basedefinitions.h"
@@ -65,6 +64,7 @@ struct PmeGpu;
  struct gmx_wallclock_gpu_pme_t;
  struct gmx_device_info_t;
  struct gmx_pme_t;
+struct gmx_wallcycle;
  
  enum class GpuTaskCompletion;
  
@@ -167,7 +167,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
                 real sigmaA[],   real sigmaB[],
                 matrix box,      t_commrec *cr,
                 int  maxshift_x, int maxshift_y,
-               t_nrnb *nrnb,    gmx_wallcycle_t wcycle,
+               t_nrnb *nrnb,    gmx_wallcycle *wcycle,
                 matrix vir_q,    matrix vir_lj,
                 real *energy_q,  real *energy_lj,
                 real lambda_q,   real lambda_lj,
@@ -177,7 +177,7 @@ int gmx_pme_do(struct gmx_pme_t *pme,
  /*! \brief Called on the nodes that do PME exclusively (as slaves) */
  int gmx_pmeonly(struct gmx_pme_t *pme,
                  struct t_commrec *cr,     t_nrnb *mynrnb,
-                gmx_wallcycle_t wcycle,
+                gmx_wallcycle  *wcycle,
                  gmx_walltime_accounting_t walltime_accounting,
                  t_inputrec *ir, PmeRunMode runMode);
  
@@ -204,7 +204,7 @@ void gmx_pme_send_parameters(struct t_commrec *cr,
  void gmx_pme_send_coordinates(struct t_commrec *cr, matrix box, rvec *x,
                                real lambda_q, real lambda_lj,
                                gmx_bool bEnerVir,
-                              gmx_int64_t step);
+                              gmx_int64_t step, gmx_wallcycle *wcycle);
  
  /*! \brief Tell our PME-only node to finish */
  void gmx_pme_send_finish(struct t_commrec *cr);
@@ -296,7 +296,7 @@ void pme_gpu_get_timings(const gmx_pme_t         *pme,
  void pme_gpu_prepare_computation(gmx_pme_t      *pme,
                                   bool            needToUpdateBox,
                                   const matrix    box,
-                                 gmx_wallcycle_t wcycle,
+                                 gmx_wallcycle  *wcycle,
                                   int             flags);
  
  /*! \brief
@@ -308,7 +308,7 @@ void pme_gpu_prepare_computation(gmx_pme_t      *pme,
   */
  void pme_gpu_launch_spread(gmx_pme_t      *pme,
                             const rvec     *x,
-                           gmx_wallcycle_t wcycle);
+                           gmx_wallcycle  *wcycle);
  
  /*! \brief
   * Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode.
@@ -317,7 +317,7 @@ void pme_gpu_launch_spread(gmx_pme_t      *pme,
   * \param[in] wcycle            The wallclock counter.
   */
  void pme_gpu_launch_complex_transforms(gmx_pme_t       *pme,
-                                       gmx_wallcycle_t  wcycle);
+                                       gmx_wallcycle   *wcycle);
  
  /*! \brief
   * Launches last stage of PME on GPU - force gathering and D2H force transfer.
@@ -329,7 +329,7 @@ void pme_gpu_launch_complex_transforms(gmx_pme_t       *pme,
   *                               and accumulates. The reduction is non-atomic.
   */
  void pme_gpu_launch_gather(const gmx_pme_t        *pme,
-                           gmx_wallcycle_t         wcycle,
+                           gmx_wallcycle          *wcycle,
                             PmeForceOutputHandling  forceTreatment);
  
  /*! \brief
@@ -343,7 +343,7 @@ void pme_gpu_launch_gather(const gmx_pme_t        *pme,
   * \param[out] energy         The output energy.
   */
  void pme_gpu_wait_finish_task(const gmx_pme_t                *pme,
-                              gmx_wallcycle_t                 wcycle,
+                              gmx_wallcycle                  *wcycle,
                                gmx::ArrayRef<const gmx::RVec> *forces,
                                matrix                          virial,
                                real                           *energy);
@@ -369,7 +369,7 @@ void pme_gpu_wait_finish_task(const gmx_pme_t                *pme,
   * \returns                   True if the PME GPU tasks have completed
   */
  bool pme_gpu_try_finish_task(const gmx_pme_t                *pme,
-                             gmx_wallcycle_t                 wcycle,
+                             gmx_wallcycle                  *wcycle,
                               gmx::ArrayRef<const gmx::RVec> *forces,
                               matrix                          virial,
                               real                           *energy,
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.cpp b/src/gromacs/mdlib/nbnxn_atomdata.cpp

index 5c45f4b496448d67f6dce94c53c2dd1f76df32a2..1d37c91145bf51f75d057adac2e86ccf7bb508ec 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_atomdata.cpp
+++ b/src/gromacs/mdlib/nbnxn_atomdata.cpp
@@ -60,6 +60,7 @@
  #include "gromacs/mdtypes/mdatom.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/simd/simd.h"
+#include "gromacs/timing/wallcycle.h"
  #include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxomp.h"
@@ -1059,8 +1060,12 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search_t nbs,
                                       int                  locality,
                                       gmx_bool             FillLocal,
                                       rvec                *x,
-                                     nbnxn_atomdata_t    *nbat)
+                                     nbnxn_atomdata_t    *nbat,
+                                     gmx_wallcycle       *wcycle)
  {
+    wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
+
      int g0 = 0, g1 = 0;
      int nth, th;
  
@@ -1129,6 +1134,9 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search_t nbs,
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
      }
+
+    wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
+    wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
  }
  
  static void
@@ -1516,8 +1524,12 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nba
  void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
                                      int                     locality,
                                      const nbnxn_atomdata_t *nbat,
-                                    rvec                   *f)
+                                    rvec                   *f,
+                                    gmx_wallcycle          *wcycle)
  {
+    wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+    wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+
      int a0 = 0, na = 0;
  
      nbs_cycle_start(&nbs->cc[enbsCCreducef]);
@@ -1575,6 +1587,9 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
      }
  
      nbs_cycle_stop(&nbs->cc[enbsCCreducef]);
+
+    wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
+    wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
  }
  
  /* Adds the shift forces from nbnxn_atomdata_t to fshift */
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.h b/src/gromacs/mdlib/nbnxn_atomdata.h

index d1d594e9e5f15915ebe405d6a9b8b860a0e4e288..d6c7eaf017b9e904afb72917589b8b81fd7719c9 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_atomdata.h
+++ b/src/gromacs/mdlib/nbnxn_atomdata.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -49,6 +49,7 @@ class MDLogger;
  }
  
  struct t_mdatoms;
+struct gmx_wallcycle;
  
  /* Default nbnxn allocation routine, allocates 32 byte aligned,
   * which works for plain C and aligned SSE and AVX loads/stores.
@@ -112,13 +113,15 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search_t nbs,
                                       int                  locality,
                                       gmx_bool             FillLocal,
                                       rvec                *x,
-                                     nbnxn_atomdata_t    *nbat);
+                                     nbnxn_atomdata_t    *nbat,
+                                     gmx_wallcycle       *wcycle);
  
  /* Add the forces stored in nbat to f, zeros the forces in nbat */
  void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
                                      int                     locality,
                                      const nbnxn_atomdata_t *nbat,
-                                    rvec                   *f);
+                                    rvec                   *f,
+                                    gmx_wallcycle          *wcycle);
  
  /* Add the fshift force stored in nbat to fshift */
  void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index 5038cb6fc2a29b0e13cbbb68a994729c1ffda29e..483f10e0637469247fdfd91267c0dc87fa701519 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -384,14 +384,12 @@ static void post_process_forces(t_commrec *cr,
               * This is parallellized. MPI communication is performed
               * if the constructing atoms aren't local.
               */
-            wallcycle_start(wcycle, ewcVSITESPREAD);
              matrix virial = { { 0 } };
              spread_vsite_f(vsite, x, fDirectVir, nullptr,
                             (flags & GMX_FORCE_VIRIAL), virial,
                             nrnb,
-                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle);
              forceWithVirial->addVirialContribution(virial);
-            wallcycle_stop(wcycle, ewcVSITESPREAD);
          }
  
          if (flags & GMX_FORCE_VIRIAL)
@@ -991,12 +989,8 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t             *nbv,
                  wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
                  wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
  
-                wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-                wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
                  nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal,
-                                               nbv->nbat, as_rvec_array(force->data()));
-                wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
-                wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+                                               nbv->nbat, as_rvec_array(force->data()), wcycle);
              }
          }
      }
@@ -1154,12 +1148,10 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
           * and domain decomposition does not use the graph,
           * we do not need to worry about shifting.
           */
-        wallcycle_start(wcycle, ewcPP_PMESENDX);
          gmx_pme_send_coordinates(cr, box, as_rvec_array(x.data()),
                                   lambda[efptCOUL], lambda[efptVDW],
                                   (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
-                                 step);
-        wallcycle_stop(wcycle, ewcPP_PMESENDX);
+                                 step, wcycle);
      }
  #endif /* GMX_MPI */
  
@@ -1256,12 +1248,8 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
      }
      else
      {
-        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-        wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
          nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, FALSE, as_rvec_array(x.data()),
-                                        nbv->nbat);
-        wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
-        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+                                        nbv->nbat, wcycle);
      }
  
      if (bUseGPU)
@@ -1324,16 +1312,10 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
          }
          else
          {
-            wallcycle_start(wcycle, ewcMOVEX);
-            dd_move_x(cr->dd, box, as_rvec_array(x.data()));
-            wallcycle_stop(wcycle, ewcMOVEX);
+            dd_move_x(cr->dd, box, as_rvec_array(x.data()), wcycle);
  
-            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
              nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatNonlocal, FALSE, as_rvec_array(x.data()),
-                                            nbv->nbat);
-            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
-            wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+                                            nbv->nbat, wcycle);
          }
  
          if (bUseGPU)
@@ -1509,11 +1491,9 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
           * communication with calculation with domain decomposition.
           */
          wallcycle_stop(wcycle, ewcFORCE);
-        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
-        nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->nbat, f);
-        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
-        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+
+        nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->nbat, f, wcycle);
+
          wallcycle_start_nocount(wcycle, ewcFORCE);
  
          /* if there are multiple fshift output buffers reduce them */
@@ -1568,16 +1548,13 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
                               step, nrnb, wcycle);
                  wallcycle_stop(wcycle, ewcFORCE);
              }
-            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-            wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+
              /* skip the reduction if there was no non-local work to do */
              if (nbv->grp[eintNonlocal].nbl_lists.nbl[0]->nsci > 0)
              {
                  nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatNonlocal,
-                                               nbv->nbat, f);
+                                               nbv->nbat, f, wcycle);
              }
-            wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
-            wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
          }
      }
  
@@ -1594,9 +1571,7 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
          }
          if (bDoForces)
          {
-            wallcycle_start(wcycle, ewcMOVEF);
-            dd_move_f(cr->dd, f, fr->fshift);
-            wallcycle_stop(wcycle, ewcMOVEF);
+            dd_move_f(cr->dd, f, fr->fshift, wcycle);
          }
      }
  
@@ -1684,12 +1659,8 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
       * on the non-alternating path. */
      if (bUseOrEmulGPU && !alternateGpuWait)
      {
-        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
-        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
          nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal,
-                                       nbv->nbat, f);
-        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
-        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+                                       nbv->nbat, f, wcycle);
      }
  
      if (DOMAINDECOMP(cr))
@@ -1704,10 +1675,8 @@ static void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
           */
          if (vsite && !(fr->haveDirectVirialContributions && !(flags & GMX_FORCE_VIRIAL)))
          {
-            wallcycle_start(wcycle, ewcVSITESPREAD);
              spread_vsite_f(vsite, as_rvec_array(x.data()), f, fr->fshift, FALSE, nullptr, nrnb,
-                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
-            wallcycle_stop(wcycle, ewcVSITESPREAD);
+                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle);
          }
  
          if (flags & GMX_FORCE_VIRIAL)
@@ -1851,21 +1820,18 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
           * and domain decomposition does not use the graph,
           * we do not need to worry about shifting.
           */
-        wallcycle_start(wcycle, ewcPP_PMESENDX);
          gmx_pme_send_coordinates(cr, box, as_rvec_array(x.data()),
                                   lambda[efptCOUL], lambda[efptVDW],
                                   (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
-                                 step);
-        wallcycle_stop(wcycle, ewcPP_PMESENDX);
+                                 step, wcycle);
      }
  #endif /* GMX_MPI */
  
      /* Communicate coordinates and sum dipole if necessary */
      if (DOMAINDECOMP(cr))
      {
-        wallcycle_start(wcycle, ewcMOVEX);
-        dd_move_x(cr->dd, box, as_rvec_array(x.data()));
-        wallcycle_stop(wcycle, ewcMOVEX);
+        dd_move_x(cr->dd, box, as_rvec_array(x.data()), wcycle);
+
          /* No GPU support, no move_x overlap, so reopen the balance region here */
          if (ddOpenBalanceRegion == DdOpenBalanceRegionBeforeForceComputation::yes)
          {
@@ -2010,8 +1976,7 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
          /* Communicate the forces */
          if (DOMAINDECOMP(cr))
          {
-            wallcycle_start(wcycle, ewcMOVEF);
-            dd_move_f(cr->dd, f, fr->fshift);
+            dd_move_f(cr->dd, f, fr->fshift, wcycle);
              /* Do we need to communicate the separate force array
               * for terms that do not contribute to the single sum virial?
               * Position restraints and electric fields do not introduce
@@ -2022,9 +1987,8 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
              if (EEL_FULL(fr->ic->eeltype) && cr->dd->n_intercg_excl &&
                  (flags & GMX_FORCE_VIRIAL))
              {
-                dd_move_f(cr->dd, as_rvec_array(forceWithVirial.force_.data()), nullptr);
+                dd_move_f(cr->dd, as_rvec_array(forceWithVirial.force_.data()), nullptr, wcycle);
              }
-            wallcycle_stop(wcycle, ewcMOVEF);
          }
  
          /* If we have NoVirSum forces, but we do not calculate the virial,
@@ -2032,10 +1996,8 @@ static void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
           */
          if (vsite && !(fr->haveDirectVirialContributions && !(flags & GMX_FORCE_VIRIAL)))
          {
-            wallcycle_start(wcycle, ewcVSITESPREAD);
              spread_vsite_f(vsite, as_rvec_array(x.data()), f, fr->fshift, FALSE, nullptr, nrnb,
-                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
-            wallcycle_stop(wcycle, ewcVSITESPREAD);
+                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle);
          }
  
          if (flags & GMX_FORCE_VIRIAL)
diff --git a/src/gromacs/mdlib/vsite.cpp b/src/gromacs/mdlib/vsite.cpp

index fc6b913bc061faf36b376b356859845b5dfe306f..38fd2e8ef6535d00df0542a7047b70ed7806b2dd 100644 (file)
--- a/src/gromacs/mdlib/vsite.cpp
+++ b/src/gromacs/mdlib/vsite.cpp
@@ -55,6 +55,7 @@
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/pbcutil/mshift.h"
  #include "gromacs/pbcutil/pbc.h"
+#include "gromacs/timing/wallcycle.h"
  #include "gromacs/topology/ifunc.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/topology/topology.h"
@@ -64,7 +65,6 @@
  #include "gromacs/utility/gmxomp.h"
  #include "gromacs/utility/smalloc.h"
  
-
  /* The strategy used here for assigning virtual sites to (thread-)tasks
   * is as follows:
   *
@@ -1617,8 +1617,9 @@ void spread_vsite_f(const gmx_vsite_t *vsite,
                      gmx_bool VirCorr, matrix vir,
                      t_nrnb *nrnb, const t_idef *idef,
                      int ePBC, gmx_bool bMolPBC, const t_graph *g, const matrix box,
-                    t_commrec *cr)
+                    t_commrec *cr, gmx_wallcycle *wcycle)
  {
+    wallcycle_start(wcycle, ewcVSITESPREAD);
      const bool useDomdec = vsite->useDomdec;
      GMX_ASSERT(!useDomdec || (cr != nullptr && DOMAINDECOMP(cr)), "When vsites are set up with domain decomposition, we need a valid commrec");
  
@@ -1819,6 +1820,8 @@ void spread_vsite_f(const gmx_vsite_t *vsite,
      inc_nrnb(nrnb, eNR_VSITE4FD, vsite_count(idef->il, F_VSITE4FD));
      inc_nrnb(nrnb, eNR_VSITE4FDN, vsite_count(idef->il, F_VSITE4FDN));
      inc_nrnb(nrnb, eNR_VSITEN,   vsite_count(idef->il, F_VSITEN));
+
+    wallcycle_stop(wcycle, ewcVSITESPREAD);
  }
  
  /*! \brief Returns the an array with charge-group indices for each atom
diff --git a/src/gromacs/mdlib/vsite.h b/src/gromacs/mdlib/vsite.h

index b45930848980d982ae1d16e396690ab8ae68f8db..ae84408b81d59602615f1069ebe2b22c84fc6d87 100644 (file)
--- a/src/gromacs/mdlib/vsite.h
+++ b/src/gromacs/mdlib/vsite.h
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -51,6 +51,7 @@ struct t_graph;
  struct t_ilist;
  struct t_mdatoms;
  struct t_nrnb;
+struct gmx_wallcycle;
  
  typedef struct gmx_vsite_t {
      gmx_bool             bHaveChargeGroups;    /* Do we have charge groups?               */
@@ -102,7 +103,7 @@ void spread_vsite_f(const gmx_vsite_t *vsite,
                      gmx_bool VirCorr, matrix vir,
                      t_nrnb *nrnb, const t_idef *idef,
                      int ePBC, gmx_bool bMolPBC, const t_graph *g, const matrix box,
-                    t_commrec *cr);
+                    t_commrec *cr, gmx_wallcycle *wcycle);
  /* Spread the force operating on the vsite atoms on the surrounding atoms.
   * If fshift!=NULL also update the shift forces.
   * If VirCorr=TRUE add the virial correction for non-linear vsite constructs
diff --git a/src/gromacs/timing/wallcycle.cpp b/src/gromacs/timing/wallcycle.cpp

index 86f5a7d8bc7b8cee653955ce5141a623763fa737..914eb6284da9ed5b21f4b28ecb1fe67a1192379b 100644 (file)
--- a/src/gromacs/timing/wallcycle.cpp
+++ b/src/gromacs/timing/wallcycle.cpp
@@ -75,7 +75,7 @@ typedef struct
      gmx_cycles_t start;
  } wallcc_t;
  
-typedef struct gmx_wallcycle
+struct gmx_wallcycle
  {
      wallcc_t        *wcc;
      /* did we detect one or more invalid cycle counts */
diff --git a/src/gromacs/timing/wallcycle.h b/src/gromacs/timing/wallcycle.h

index 6896f87a95ac8f270860ec6b562c2677999c0d90..33291cdf4df252226d6792020abb172294fdf475 100644 (file)
--- a/src/gromacs/timing/wallcycle.h
+++ b/src/gromacs/timing/wallcycle.h
@@ -46,6 +46,7 @@
  
  typedef struct gmx_wallcycle *gmx_wallcycle_t;
  struct t_commrec;
+const gmx_wallcycle_t nullWallcycle = nullptr;
  
  enum {
      ewcRUN, ewcSTEP, ewcPPDURINGPME, ewcDOMDEC, ewcDDCOMMLOAD,
author	Prashanth Kanduri <kanduri@cscs.ch>
	Mon, 18 Dec 2017 09:05:04 +0000 (10:05 +0100)
committer	Prashanth Kanduri <kanduri@cscs.ch>
	Wed, 7 Feb 2018 14:17:37 +0000 (15:17 +0100)
src/gromacs/domdec/domdec.cpp		patch \| blob \| history
src/gromacs/domdec/domdec.h		patch \| blob \| history
src/gromacs/ewald/pme-gpu.cpp		patch \| blob \| history
src/gromacs/ewald/pme-only.cpp		patch \| blob \| history
src/gromacs/ewald/pme-pp.cpp		patch \| blob \| history
src/gromacs/ewald/pme.cpp		patch \| blob \| history
src/gromacs/ewald/pme.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_atomdata.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_atomdata.h		patch \| blob \| history
src/gromacs/mdlib/sim_util.cpp		patch \| blob \| history
src/gromacs/mdlib/vsite.cpp		patch \| blob \| history
src/gromacs/mdlib/vsite.h		patch \| blob \| history
src/gromacs/timing/wallcycle.cpp		patch \| blob \| history
src/gromacs/timing/wallcycle.h		patch \| blob \| history