Use GpuTimers in CUDA and OpenCL versions of NBNXM directly

author Artem Zhmurov <zhmurov@gmail.com>

Wed, 24 Feb 2021 13:42:23 +0000 (16:42 +0300)

committer Artem Zhmurov <zhmurov@gmail.com>

Wed, 24 Feb 2021 13:42:23 +0000 (16:42 +0300)
author Artem Zhmurov <zhmurov@gmail.com>
Wed, 24 Feb 2021 13:42:23 +0000 (16:42 +0300)
committer Artem Zhmurov <zhmurov@gmail.com>
Wed, 24 Feb 2021 13:42:23 +0000 (16:42 +0300)
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu

index ccb1fdf26d19f4e6354eeab609a3b756a41109ad..0ddaa17b146e746cd5230692b037e787cc35fd3e 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -457,7 +457,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
  
      NBAtomData*         adat         = nb->atdat;
      gpu_plist*          plist        = nb->plist[iloc];
  
      NBAtomData*         adat         = nb->atdat;
      gpu_plist*          plist        = nb->plist[iloc];
-    cu_timers_t*        t            = nb->timers;
+    Nbnxm::GpuTimers*   timers       = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool bDoTime = nb->bDoTime;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool bDoTime = nb->bDoTime;
@@ -489,7 +489,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
      /* beginning of timed HtoD section */
      if (bDoTime)
      {
      /* beginning of timed HtoD section */
      if (bDoTime)
      {
-        t->xf[atomLocality].nb_h2d.openTimingRegion(deviceStream);
+        timers->xf[atomLocality].nb_h2d.openTimingRegion(deviceStream);
      }
  
      /* HtoD x, q */
      }
  
      /* HtoD x, q */
@@ -505,7 +505,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
  
      if (bDoTime)
      {
  
      if (bDoTime)
      {
-        t->xf[atomLocality].nb_h2d.closeTimingRegion(deviceStream);
+        timers->xf[atomLocality].nb_h2d.closeTimingRegion(deviceStream);
      }
  
      /* When we get here all misc operations issued in the local stream as well as
      }
  
      /* When we get here all misc operations issued in the local stream as well as
@@ -539,7 +539,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
      NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
      NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
-    cu_timers_t*        t            = nb->timers;
+    Nbnxm::GpuTimers*   timers       = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool bDoTime = nb->bDoTime;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool bDoTime = nb->bDoTime;
@@ -578,7 +578,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
      /* beginning of timed nonbonded calculation section */
      if (bDoTime)
      {
      /* beginning of timed nonbonded calculation section */
      if (bDoTime)
      {
-        t->interaction[iloc].nb_k.openTimingRegion(deviceStream);
+        timers->interaction[iloc].nb_k.openTimingRegion(deviceStream);
      }
  
      /* Kernel launch config:
      }
  
      /* Kernel launch config:
@@ -619,7 +619,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
                  config.sharedMemorySize);
      }
  
                  config.sharedMemorySize);
      }
  
-    auto*      timingEvent = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
+    auto*      timingEvent = bDoTime ? timers->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
      const auto kernel =
              select_nbnxn_kernel(nbp->elecType,
                                  nbp->vdwType,
      const auto kernel =
              select_nbnxn_kernel(nbp->elecType,
                                  nbp->vdwType,
@@ -632,7 +632,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const In
  
      if (bDoTime)
      {
  
      if (bDoTime)
      {
-        t->interaction[iloc].nb_k.closeTimingRegion(deviceStream);
+        timers->interaction[iloc].nb_k.closeTimingRegion(deviceStream);
      }
  
      if (GMX_NATIVE_WINDOWS)
      }
  
      if (GMX_NATIVE_WINDOWS)
@@ -660,7 +660,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
      NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
      NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
-    cu_timers_t*        t            = nb->timers;
+    Nbnxm::GpuTimers*   timers       = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool bDoTime = nb->bDoTime;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool bDoTime = nb->bDoTime;
@@ -711,7 +711,8 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
      GpuRegionTimer* timer = nullptr;
      if (bDoTime)
      {
      GpuRegionTimer* timer = nullptr;
      if (bDoTime)
      {
-        timer = &(plist->haveFreshList ? t->interaction[iloc].prune_k : t->interaction[iloc].rollingPrune_k);
+        timer = &(plist->haveFreshList ? timers->interaction[iloc].prune_k
+                                       : timers->interaction[iloc].rollingPrune_k);
      }
  
      /* beginning of timed prune calculation section */
      }
  
      /* beginning of timed prune calculation section */
@@ -800,7 +801,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
  
      /* extract the data */
      NBAtomData*         adat         = nb->atdat;
  
      /* extract the data */
      NBAtomData*         adat         = nb->atdat;
-    cu_timers_t*        t            = nb->timers;
+    Nbnxm::GpuTimers*   timers       = nb->timers;
      bool                bDoTime      = nb->bDoTime;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool                bDoTime      = nb->bDoTime;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
@@ -817,7 +818,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
      /* beginning of timed D2H section */
      if (bDoTime)
      {
      /* beginning of timed D2H section */
      if (bDoTime)
      {
-        t->xf[atomLocality].nb_d2h.openTimingRegion(deviceStream);
+        timers->xf[atomLocality].nb_d2h.openTimingRegion(deviceStream);
      }
  
      /* With DD the local D2H transfer can only start after the non-local
      }
  
      /* With DD the local D2H transfer can only start after the non-local
@@ -884,7 +885,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
  
      if (bDoTime)
      {
  
      if (bDoTime)
      {
-        t->xf[atomLocality].nb_d2h.closeTimingRegion(deviceStream);
+        timers->xf[atomLocality].nb_d2h.closeTimingRegion(deviceStream);
      }
  }
  
      }
  }
  
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu

index 2505422927b705a17a3b6b97f7670d548741110e..f4467c0248a0149f98dc38b33e6e3fdaaa056d9c 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -194,7 +194,7 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
  
      nb->bUseTwoStreams = bLocalAndNonlocal;
  
  
      nb->bUseTwoStreams = bLocalAndNonlocal;
  
-    nb->timers = new cu_timers_t();
+    nb->timers = new Nbnxm::GpuTimers();
      snew(nb->timings, 1);
  
      /* init nbst */
      snew(nb->timings, 1);
  
      /* init nbst */
@@ -313,7 +313,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
      int                  nalloc, natoms;
      bool                 realloced;
      bool                 bDoTime       = nb->bDoTime;
      int                  nalloc, natoms;
      bool                 realloced;
      bool                 bDoTime       = nb->bDoTime;
-    cu_timers_t*         timers        = nb->timers;
+    Nbnxm::GpuTimers*    timers        = nb->timers;
      NBAtomData*          d_atdat       = nb->atdat;
      const DeviceContext& deviceContext = *nb->deviceContext_;
      const DeviceStream&  localStream   = *nb->deviceStreams[InteractionLocality::Local];
      NBAtomData*          d_atdat       = nb->atdat;
      const DeviceContext& deviceContext = *nb->deviceContext_;
      const DeviceStream&  localStream   = *nb->deviceStreams[InteractionLocality::Local];
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h

index 08d96de90f2b507885214c06002c113e9bfe8486..f3d1bb5acc5d1a9a6ed04b55cd63631fbf3e1a36 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
@@ -64,11 +64,6 @@
  /*! \brief cluster size = number of atoms per cluster. */
  static constexpr int c_clSize = c_nbnxnGpuClusterSize;
  
  /*! \brief cluster size = number of atoms per cluster. */
  static constexpr int c_clSize = c_nbnxnGpuClusterSize;
  
-/** \internal
- * \brief Typedef of actual timer type.
- */
-typedef struct Nbnxm::gpu_timers_t cu_timers_t;
-
  /*! \internal
   * \brief Main data structure for CUDA nonbonded force calculations.
   */
  /*! \internal
   * \brief Main data structure for CUDA nonbonded force calculations.
   */
@@ -140,7 +135,7 @@ struct NbnxmGpu
      /*! \brief True if event-based timing is enabled. */
      bool bDoTime = false;
      /*! \brief CUDA event-based timers. */
      /*! \brief True if event-based timing is enabled. */
      bool bDoTime = false;
      /*! \brief CUDA event-based timers. */
-    cu_timers_t* timers = nullptr;
+    Nbnxm::GpuTimers* timers = nullptr;
      /*! \brief Timing data. TODO: deprecate this and query timers for accumulated data instead */
      gmx_wallclock_gpu_nbnxn_t* timings = nullptr;
  };
      /*! \brief Timing data. TODO: deprecate this and query timers for accumulated data instead */
      gmx_wallclock_gpu_nbnxn_t* timings = nullptr;
  };
diff --git a/src/gromacs/nbnxm/gpu_common.h b/src/gromacs/nbnxm/gpu_common.h

index 92002bbf4ba12b59e41aaf4e4825f7ffb4e0dfe6..b2663b33f979075c3de2ca684960607d9cafb741 100644 (file)
--- a/src/gromacs/nbnxm/gpu_common.h
+++ b/src/gromacs/nbnxm/gpu_common.h
@@ -193,19 +193,18 @@ static inline gmx::Range<int> getGpuAtomRange(const NBAtomData* atomData, const
   *   - 1st pass prune: ran during the current step (prior to the force kernel);
   *   - rolling prune:  ran at the end of the previous step (prior to the current step H2D xq);
   *
   *   - 1st pass prune: ran during the current step (prior to the force kernel);
   *   - rolling prune:  ran at the end of the previous step (prior to the current step H2D xq);
   *
- * Note that the resetting of cu_timers_t::didPrune and cu_timers_t::didRollingPrune should happen
- * after calling this function.
+ * Note that the resetting of Nbnxm::GpuTimers::didPrune and Nbnxm::GpuTimers::didRollingPrune
+ * should happen after calling this function.
   *
   * \param[in] timers   structs with GPU timer objects
   * \param[inout] timings  GPU task timing data
   * \param[in] iloc        interaction locality
   */
   *
   * \param[in] timers   structs with GPU timer objects
   * \param[inout] timings  GPU task timing data
   * \param[in] iloc        interaction locality
   */
-template<typename GpuTimers>
-static void countPruneKernelTime(GpuTimers*                 timers,
+static void countPruneKernelTime(Nbnxm::GpuTimers*          timers,
                                   gmx_wallclock_gpu_nbnxn_t* timings,
                                   const InteractionLocality  iloc)
  {
                                   gmx_wallclock_gpu_nbnxn_t* timings,
                                   const InteractionLocality  iloc)
  {
-    gpu_timers_t::Interaction& iTimers = timers->interaction[iloc];
+    GpuTimers::Interaction& iTimers = timers->interaction[iloc];
  
      // We might have not done any pruning (e.g. if we skipped with empty domains).
      if (!iTimers.didPrune && !iTimers.didRollingPrune)
  
      // We might have not done any pruning (e.g. if we skipped with empty domains).
      if (!iTimers.didPrune && !iTimers.didRollingPrune)
@@ -281,7 +280,6 @@ static inline void gpu_reduce_staged_outputs(const NBStagingData&      nbst,
   *      counters could end up being inconsistent due to not being incremented
   *      on some of the node when this is skipped on empty local domains!
   *
   *      counters could end up being inconsistent due to not being incremented
   *      on some of the node when this is skipped on empty local domains!
   *
- * \tparam     GpuTimers         GPU timers type
   * \tparam     GpuPairlist       Pair list type
   * \param[out] timings           Pointer to the NB GPU timings data
   * \param[in]  timers            Pointer to GPU timers data
   * \tparam     GpuPairlist       Pair list type
   * \param[out] timings           Pointer to the NB GPU timings data
   * \param[in]  timers            Pointer to GPU timers data
@@ -291,9 +289,9 @@ static inline void gpu_reduce_staged_outputs(const NBStagingData&      nbst,
   * \param[in]  doTiming          True if timing is enabled.
   *
   */
   * \param[in]  doTiming          True if timing is enabled.
   *
   */
-template<typename GpuTimers, typename GpuPairlist>
+template<typename GpuPairlist>
  static inline void gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t* timings,
  static inline void gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t* timings,
-                                          GpuTimers*                 timers,
+                                          Nbnxm::GpuTimers*          timers,
                                            const GpuPairlist*         plist,
                                            AtomLocality               atomLocality,
                                            const gmx::StepWorkload&   stepWork,
                                            const GpuPairlist*         plist,
                                            AtomLocality               atomLocality,
                                            const gmx::StepWorkload&   stepWork,
diff --git a/src/gromacs/nbnxm/gpu_types_common.h b/src/gromacs/nbnxm/gpu_types_common.h

index 85c5853ebd4de1cbae1ef6e1209c90ca9ee56035..4b7f0e762e8f079dc878eecbbd3315b5ed1e0b0c 100644 (file)
--- a/src/gromacs/nbnxm/gpu_types_common.h
+++ b/src/gromacs/nbnxm/gpu_types_common.h
@@ -205,7 +205,7 @@ using gmx::InteractionLocality;
   * The two-sized arrays hold the local and non-local values and should always
   * be indexed with eintLocal/eintNonlocal.
   */
   * The two-sized arrays hold the local and non-local values and should always
   * be indexed with eintLocal/eintNonlocal.
   */
-struct gpu_timers_t
+struct GpuTimers
  {
      /*! \internal
       * \brief Timers for local or non-local coordinate/force transfers
  {
      /*! \internal
       * \brief Timers for local or non-local coordinate/force transfers
@@ -244,7 +244,7 @@ struct gpu_timers_t
      //! timers for coordinate/force transfers (every step)
      gmx::EnumerationArray<AtomLocality, XFTransfers> xf;
      //! timers for interaction related transfers
      //! timers for coordinate/force transfers (every step)
      gmx::EnumerationArray<AtomLocality, XFTransfers> xf;
      //! timers for interaction related transfers
-    gmx::EnumerationArray<InteractionLocality, Nbnxm::gpu_timers_t::Interaction> interaction;
+    gmx::EnumerationArray<InteractionLocality, Nbnxm::GpuTimers::Interaction> interaction;
  };
  
  /*! \internal
  };
  
  /*! \internal
diff --git a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp

index aad56f49155f63facabd475f6fad7a6ae94c4e3a..51f7745f7f6791346c7847b8431f81f9823cee1c 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp
@@ -258,7 +258,7 @@ void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const Inte
          }
      }
  
          }
      }
  
-    gpu_timers_t::Interaction& iTimers = nb->timers->interaction[iloc];
+    GpuTimers::Interaction& iTimers = nb->timers->interaction[iloc];
  
      if (bDoTime)
      {
  
      if (bDoTime)
      {
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp

index 50e7b9d8d4f167e1a36eef7134a2047e8b4b7bd5..b8107fc857de911e79f6b48714301118a2a1aba5 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
@@ -529,7 +529,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
  
      NBAtomData*         adat         = nb->atdat;
      gpu_plist*          plist        = nb->plist[iloc];
  
      NBAtomData*         adat         = nb->atdat;
      gpu_plist*          plist        = nb->plist[iloc];
-    cl_timers_t*        t            = nb->timers;
+    Nbnxm::GpuTimers*   timers       = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool bDoTime = nb->bDoTime;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool bDoTime = nb->bDoTime;
@@ -561,7 +561,7 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
      /* beginning of timed HtoD section */
      if (bDoTime)
      {
      /* beginning of timed HtoD section */
      if (bDoTime)
      {
-        t->xf[atomLocality].nb_h2d.openTimingRegion(deviceStream);
+        timers->xf[atomLocality].nb_h2d.openTimingRegion(deviceStream);
      }
  
      /* HtoD x, q */
      }
  
      /* HtoD x, q */
@@ -573,11 +573,11 @@ void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const Atom
                         atomsRange.size(),
                         deviceStream,
                         GpuApiCallBehavior::Async,
                         atomsRange.size(),
                         deviceStream,
                         GpuApiCallBehavior::Async,
-                       bDoTime ? t->xf[atomLocality].nb_h2d.fetchNextEvent() : nullptr);
+                       bDoTime ? timers->xf[atomLocality].nb_h2d.fetchNextEvent() : nullptr);
  
      if (bDoTime)
      {
  
      if (bDoTime)
      {
-        t->xf[atomLocality].nb_h2d.closeTimingRegion(deviceStream);
+        timers->xf[atomLocality].nb_h2d.closeTimingRegion(deviceStream);
      }
  
      /* When we get here all misc operations issued in the local stream as well as
      }
  
      /* When we get here all misc operations issued in the local stream as well as
@@ -613,7 +613,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
      NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
      NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
-    cl_timers_t*        t            = nb->timers;
+    Nbnxm::GpuTimers*   timers       = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool bDoTime = nb->bDoTime;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool bDoTime = nb->bDoTime;
@@ -655,7 +655,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
      /* beginning of timed nonbonded calculation section */
      if (bDoTime)
      {
      /* beginning of timed nonbonded calculation section */
      if (bDoTime)
      {
-        t->interaction[iloc].nb_k.openTimingRegion(deviceStream);
+        timers->interaction[iloc].nb_k.openTimingRegion(deviceStream);
      }
  
      /* kernel launch config */
      }
  
      /* kernel launch config */
@@ -685,7 +685,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
  
      fillin_ocl_structures(nbp, &nbparams_params);
  
  
      fillin_ocl_structures(nbp, &nbparams_params);
  
-    auto*          timingEvent  = bDoTime ? t->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
+    auto* timingEvent = bDoTime ? timers->interaction[iloc].nb_k.fetchNextEvent() : nullptr;
      constexpr char kernelName[] = "k_calc_nb";
      const auto     kernel =
              select_nbnxn_kernel(nb,
      constexpr char kernelName[] = "k_calc_nb";
      const auto     kernel =
              select_nbnxn_kernel(nb,
@@ -745,7 +745,7 @@ void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nb
  
      if (bDoTime)
      {
  
      if (bDoTime)
      {
-        t->interaction[iloc].nb_k.closeTimingRegion(deviceStream);
+        timers->interaction[iloc].nb_k.closeTimingRegion(deviceStream);
      }
  }
  
      }
  }
  
@@ -784,7 +784,7 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
      NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
      NBAtomData*         adat         = nb->atdat;
      NBParamGpu*         nbp          = nb->nbparam;
      gpu_plist*          plist        = nb->plist[iloc];
-    cl_timers_t*        t            = nb->timers;
+    Nbnxm::GpuTimers*   timers       = nb->timers;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
      bool                bDoTime      = nb->bDoTime;
  
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
      bool                bDoTime      = nb->bDoTime;
  
@@ -834,7 +834,8 @@ void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, c
      GpuRegionTimer* timer = nullptr;
      if (bDoTime)
      {
      GpuRegionTimer* timer = nullptr;
      if (bDoTime)
      {
-        timer = &(plist->haveFreshList ? t->interaction[iloc].prune_k : t->interaction[iloc].rollingPrune_k);
+        timer = &(plist->haveFreshList ? timers->interaction[iloc].prune_k
+                                       : timers->interaction[iloc].rollingPrune_k);
      }
  
      /* beginning of timed prune calculation section */
      }
  
      /* beginning of timed prune calculation section */
@@ -933,7 +934,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
                 "beginning of the copy back function.");
  
      NBAtomData*         adat         = nb->atdat;
                 "beginning of the copy back function.");
  
      NBAtomData*         adat         = nb->atdat;
-    cl_timers_t*        t            = nb->timers;
+    Nbnxm::GpuTimers*   timers       = nb->timers;
      bool                bDoTime      = nb->bDoTime;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
      bool                bDoTime      = nb->bDoTime;
      const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
  
@@ -958,7 +959,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
      /* beginning of timed D2H section */
      if (bDoTime)
      {
      /* beginning of timed D2H section */
      if (bDoTime)
      {
-        t->xf[atomLocality].nb_d2h.openTimingRegion(deviceStream);
+        timers->xf[atomLocality].nb_d2h.openTimingRegion(deviceStream);
      }
  
      /* With DD the local D2H transfer can only start after the non-local
      }
  
      /* With DD the local D2H transfer can only start after the non-local
@@ -978,7 +979,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
                           atomsRange.size(),
                           deviceStream,
                           GpuApiCallBehavior::Async,
                           atomsRange.size(),
                           deviceStream,
                           GpuApiCallBehavior::Async,
-                         bDoTime ? t->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
+                         bDoTime ? timers->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
  
      /* kick off work */
      cl_error = clFlush(deviceStream.stream());
  
      /* kick off work */
      cl_error = clFlush(deviceStream.stream());
@@ -1009,7 +1010,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
                                   SHIFTS,
                                   deviceStream,
                                   GpuApiCallBehavior::Async,
                                   SHIFTS,
                                   deviceStream,
                                   GpuApiCallBehavior::Async,
-                                 bDoTime ? t->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
+                                 bDoTime ? timers->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
          }
  
          /* DtoH energies */
          }
  
          /* DtoH energies */
@@ -1023,7 +1024,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
                                   1,
                                   deviceStream,
                                   GpuApiCallBehavior::Async,
                                   1,
                                   deviceStream,
                                   GpuApiCallBehavior::Async,
-                                 bDoTime ? t->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
+                                 bDoTime ? timers->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
              static_assert(sizeof(*nb->nbst.eElec) == sizeof(float),
                            "Sizes of host- and device-side electrostatic energy terms should be the "
                            "same.");
              static_assert(sizeof(*nb->nbst.eElec) == sizeof(float),
                            "Sizes of host- and device-side electrostatic energy terms should be the "
                            "same.");
@@ -1033,13 +1034,13 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
                                   1,
                                   deviceStream,
                                   GpuApiCallBehavior::Async,
                                   1,
                                   deviceStream,
                                   GpuApiCallBehavior::Async,
-                                 bDoTime ? t->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
+                                 bDoTime ? timers->xf[atomLocality].nb_d2h.fetchNextEvent() : nullptr);
          }
      }
  
      if (bDoTime)
      {
          }
      }
  
      if (bDoTime)
      {
-        t->xf[atomLocality].nb_d2h.closeTimingRegion(deviceStream);
+        timers->xf[atomLocality].nb_d2h.closeTimingRegion(deviceStream);
      }
  }
  
      }
  }
  
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp

index 25cb3158b2b58f26209bd10cafe5794438c8195d..a2ee20416f4684cfd6468e906a6380466c853a7b 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -297,7 +297,7 @@ NbnxmGpu* gpu_init(const gmx::DeviceStreamManager& deviceStreamManager,
  
      nb->bUseTwoStreams = bLocalAndNonlocal;
  
  
      nb->bUseTwoStreams = bLocalAndNonlocal;
  
-    nb->timers = new cl_timers_t();
+    nb->timers = new Nbnxm::GpuTimers();
      snew(nb->timings, 1);
  
      /* set device info, just point it to the right GPU among the detected ones */
      snew(nb->timings, 1);
  
      /* set device info, just point it to the right GPU among the detected ones */
@@ -424,7 +424,7 @@ void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
      int                  nalloc, natoms;
      bool                 realloced;
      bool                 bDoTime       = nb->bDoTime;
      int                  nalloc, natoms;
      bool                 realloced;
      bool                 bDoTime       = nb->bDoTime;
-    cl_timers_t*         timers        = nb->timers;
+    Nbnxm::GpuTimers*    timers        = nb->timers;
      NBAtomData*          d_atdat       = nb->atdat;
      const DeviceContext& deviceContext = *nb->deviceContext_;
      const DeviceStream&  localStream   = *nb->deviceStreams[InteractionLocality::Local];
      NBAtomData*          d_atdat       = nb->atdat;
      const DeviceContext& deviceContext = *nb->deviceContext_;
      const DeviceStream&  localStream   = *nb->deviceStreams[InteractionLocality::Local];
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h

index 474d90700d95d3d6aba8600a571f3f235a43884a..c802c4199fd865f6bdbcf8b8aec940035d5187c9 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
@@ -127,11 +127,6 @@ typedef struct cl_nbparam_params
  } cl_nbparam_params_t;
  
  
  } cl_nbparam_params_t;
  
  
-/** \internal
- * \brief Typedef of actual timer type.
- */
-typedef struct Nbnxm::gpu_timers_t cl_timers_t;
-
  /*! \internal
   * \brief Main data structure for OpenCL nonbonded force calculations.
   */
  /*! \internal
   * \brief Main data structure for OpenCL nonbonded force calculations.
   */
@@ -206,7 +201,7 @@ struct NbnxmGpu
      //! True if event-based timing is enabled.
      bool bDoTime = false;
      //! OpenCL event-based timers.
      //! True if event-based timing is enabled.
      bool bDoTime = false;
      //! OpenCL event-based timers.
-    cl_timers_t* timers = nullptr;
+    Nbnxm::GpuTimers* timers = nullptr;
      //! Timing data. TODO: deprecate this and query timers for accumulated data instead
      gmx_wallclock_gpu_nbnxn_t* timings = nullptr;
  };
      //! Timing data. TODO: deprecate this and query timers for accumulated data instead
      gmx_wallclock_gpu_nbnxn_t* timings = nullptr;
  };
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_types.h b/src/gromacs/nbnxm/sycl/nbnxm_sycl_types.h

index c1e23c1a7420f94bf59801e59a12f75f35c26759..fd1d655e3eff3b46b9a6af0b0078efd5c4c62707 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_types.h
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_types.h
@@ -85,7 +85,7 @@ struct NbnxmGpu
      /*! \brief True if event-based timing is enabled. Always false for SYCL. */
      bool bDoTime = false;
      /*! \brief Dummy timers. */
      /*! \brief True if event-based timing is enabled. Always false for SYCL. */
      bool bDoTime = false;
      /*! \brief Dummy timers. */
-    Nbnxm::gpu_timers_t* timers = nullptr;
+    Nbnxm::GpuTimers* timers = nullptr;
      /*! \brief Dummy timing data. */
      gmx_wallclock_gpu_nbnxn_t* timings = nullptr;
  
      /*! \brief Dummy timing data. */
      gmx_wallclock_gpu_nbnxn_t* timings = nullptr;
author	Artem Zhmurov <zhmurov@gmail.com>
	Wed, 24 Feb 2021 13:42:23 +0000 (16:42 +0300)
committer	Artem Zhmurov <zhmurov@gmail.com>
	Wed, 24 Feb 2021 13:42:23 +0000 (16:42 +0300)
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_common.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_types_common.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_gpu_data_mgmt.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_types.h		patch \| blob \| history