Make NbnxnGpu class with constructor

author Mark Abraham <mark.j.abraham@gmail.com>

Sun, 26 Jan 2020 15:33:45 +0000 (16:33 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Mon, 27 Jan 2020 07:39:47 +0000 (08:39 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Sun, 26 Jan 2020 15:33:45 +0000 (16:33 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Mon, 27 Jan 2020 07:39:47 +0000 (08:39 +0100)
diff --git a/docs/doxygen/suppressions.txt b/docs/doxygen/suppressions.txt

index 8f3a2dd4798272afd8dc918ea835c7eee67890c1..964a2c8197a39ea94daf96740f2274caf84756de 100644 (file)
--- a/docs/doxygen/suppressions.txt
+++ b/docs/doxygen/suppressions.txt
@@ -35,7 +35,7 @@ src/gromacs/nbnxm/kernels_simd_2xmm/kernel_common.h: warning: should include "nb
  src/gromacs/nbnxm/kernels_simd_4xm/kernel_common.h: warning: should include "nbnxm_simd.h"
  
  # This seems to be a false positive
-src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h: error: gmx_nbnxm_gpu_t: is in internal file(s), but appears in public documentation
+src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h: error: NbnxmGpu: is in internal file(s), but appears in public documentation
  
  # Temporary while we change the SIMD implementation
  src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace_common.h: warning: should include "simd.h"
diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp

index 8fd60a8f03e6bd7d52d42dc439a53ef278e22496..89d2e762b8f6dca8107daeb64c373333585aec7d 100644 (file)
--- a/src/gromacs/nbnxm/atomdata.cpp
+++ b/src/gromacs/nbnxm/atomdata.cpp
@@ -1074,7 +1074,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet&   gridSet,
  void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet&   gridSet,
                                      const gmx::AtomLocality locality,
                                      bool                    fillLocal,
-                                    gmx_nbnxm_gpu_t*        gpu_nbv,
+                                    NbnxmGpu*               gpu_nbv,
                                      DeviceBuffer<float>     d_x,
                                      GpuEventSynchronizer*   xReadyOnDevice)
  {
@@ -1463,7 +1463,7 @@ void reduceForcesGpu(const gmx::AtomLocality                    locality,
                       const Nbnxm::GridSet&                      gridSet,
                       void*                                      pmeForcesDevice,
                       gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
-                     gmx_nbnxm_gpu_t*                           gpu_nbv,
+                     NbnxmGpu*                                  gpu_nbv,
                       bool                                       useGpuFPmeReduction,
                       bool                                       accumulateForce)
  {
diff --git a/src/gromacs/nbnxm/atomdata.h b/src/gromacs/nbnxm/atomdata.h

index d41408d9534cf6a73d8e2222955e145dd08a8a2f..ceb87f71f093f5a99e4d86005f3c770a952cb60c 100644 (file)
--- a/src/gromacs/nbnxm/atomdata.h
+++ b/src/gromacs/nbnxm/atomdata.h
@@ -61,7 +61,7 @@ namespace gmx
  class MDLogger;
  }
  
-struct gmx_nbnxm_gpu_t;
+struct NbnxmGpu;
  struct nbnxn_atomdata_t;
  struct nonbonded_verlet_t;
  struct t_mdatoms;
@@ -379,7 +379,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet,
  void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
                                      gmx::AtomLocality     locality,
                                      bool                  fillLocal,
-                                    gmx_nbnxm_gpu_t*      gpu_nbv,
+                                    NbnxmGpu*             gpu_nbv,
                                      DeviceBuffer<float>   d_x,
                                      GpuEventSynchronizer* xReadyOnDevice);
  
@@ -408,7 +408,7 @@ void reduceForcesGpu(gmx::AtomLocality                          locality,
                       const Nbnxm::GridSet&                      gridSet,
                       void*                                      pmeForcesDevice,
                       gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
-                     gmx_nbnxm_gpu_t*                           gpu_nbv,
+                     NbnxmGpu*                                  gpu_nbv,
                       bool                                       useGpuFPmeReduction,
                       bool                                       accumulateForce);
  
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu

index f5135d7bfbf6a1a1de7639a54c05ef2eb6609f88..22da9946f1664e8b0bd20d41f2af7ad17ded3dfd 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -363,7 +363,7 @@ static inline int calc_shmem_required_nonbonded(const int               num_thre
   *  the local, this function records the event if called with the local stream as
   *  argument and inserts in the GPU stream a wait on the event on the nonlocal.
   */
-void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxm_gpu_t* nb, const InteractionLocality interactionLocality)
+void nbnxnInsertNonlocalGpuDependency(const NbnxmGpu* nb, const InteractionLocality interactionLocality)
  {
      cudaStream_t stream = nb->stream[interactionLocality];
  
@@ -389,7 +389,7 @@ void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxm_gpu_t* nb, const Interacti
  }
  
  /*! \brief Launch asynchronously the xq buffer host to device copy. */
-void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
+void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
  {
      GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
  
@@ -477,7 +477,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom, con
     the local x+q H2D (and all preceding) tasks are complete and synchronize
     with this event in the non-local stream before launching the non-bonded kernel.
   */
-void gpu_launch_kernel(gmx_nbnxm_gpu_t* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
+void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
  {
      cu_atomdata_t* adat   = nb->atdat;
      cu_nbparam_t*  nbp    = nb->nbparam;
@@ -589,7 +589,7 @@ static inline int calc_shmem_required_prune(const int num_threads_z)
      return shmem;
  }
  
-void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc, const int numParts)
+void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
  {
      cu_atomdata_t* adat   = nb->atdat;
      cu_nbparam_t*  nbp    = nb->nbparam;
@@ -713,7 +713,7 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t* nb, const InteractionLocality
      }
  }
  
-void gpu_launch_cpyback(gmx_nbnxm_gpu_t*         nb,
+void gpu_launch_cpyback(NbnxmGpu*                nb,
                          nbnxn_atomdata_t*        nbatom,
                          const gmx::StepWorkload& stepWork,
                          const AtomLocality       atomLocality)
@@ -817,7 +817,7 @@ void cuda_set_cacheconfig()
  /* X buffer operations on GPU: performs conversion from rvec to nb format. */
  void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&        grid,
                             bool                      setFillerCoords,
-                           gmx_nbnxm_gpu_t*          nb,
+                           NbnxmGpu*                 nb,
                             DeviceBuffer<float>       d_x,
                             GpuEventSynchronizer*     xReadyOnDevice,
                             const Nbnxm::AtomLocality locality,
@@ -885,7 +885,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&        grid,
   */
  void nbnxn_gpu_add_nbat_f_to_f(const AtomLocality                         atomLocality,
                                 DeviceBuffer<float>                        totalForcesDevice,
-                               gmx_nbnxm_gpu_t*                           nb,
+                               NbnxmGpu*                                  nb,
                                 void*                                      pmeForcesDevice,
                                 gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
                                 int                                        atomStart,
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu

index 2714dfee8d7dec879c6f4b5ed8cc47069a29e859..d99bdfd7749999fb8aa9a8604f435a7eef46bd7d 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -45,7 +45,7 @@
  #include <stdio.h>
  #include <stdlib.h>
  
-// TODO We would like to move this down, but the way gmx_nbnxm_gpu_t
+// TODO We would like to move this down, but the way NbnxmGpu
  //      is currently declared means this has to be before gpu_types.h
  #include "nbnxm_cuda_types.h"
  
@@ -89,7 +89,7 @@ namespace Nbnxm
  static unsigned int gpu_min_ci_balanced_factor = 44;
  
  /* Fw. decl. */
-static void nbnxn_cuda_clear_e_fshift(gmx_nbnxm_gpu_t* nb);
+static void nbnxn_cuda_clear_e_fshift(NbnxmGpu* nb);
  
  /* Fw. decl, */
  static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam);
@@ -400,7 +400,7 @@ static void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
  }
  
  /*! Initializes simulation constant data. */
-static void cuda_init_const(gmx_nbnxm_gpu_t*                nb,
+static void cuda_init_const(NbnxmGpu*                       nb,
                              const interaction_const_t*      ic,
                              const PairlistParams&           listParams,
                              const nbnxn_atomdata_t::Params& nbatParams)
@@ -412,17 +412,16 @@ static void cuda_init_const(gmx_nbnxm_gpu_t*                nb,
      nbnxn_cuda_clear_e_fshift(nb);
  }
  
-gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t*   deviceInfo,
-                          const interaction_const_t* ic,
-                          const PairlistParams&      listParams,
-                          const nbnxn_atomdata_t*    nbat,
-                          int /*rank*/,
-                          gmx_bool bLocalAndNonlocal)
+NbnxmGpu* gpu_init(const gmx_device_info_t*   deviceInfo,
+                   const interaction_const_t* ic,
+                   const PairlistParams&      listParams,
+                   const nbnxn_atomdata_t*    nbat,
+                   int /*rank*/,
+                   gmx_bool bLocalAndNonlocal)
  {
      cudaError_t stat;
  
-    gmx_nbnxm_gpu_t* nb;
-    snew(nb, 1);
+    auto nb = new NbnxmGpu;
      snew(nb->atdat, 1);
      snew(nb->nbparam, 1);
      snew(nb->plist[InteractionLocality::Local], 1);
@@ -509,7 +508,7 @@ gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t*   deviceInfo,
      return nb;
  }
  
-void gpu_init_pairlist(gmx_nbnxm_gpu_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
+void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
  {
      char         sbuf[STRLEN];
      bool         bDoTime = (nb->bDoTime && !h_plist->sci.empty());
@@ -565,7 +564,7 @@ void gpu_init_pairlist(gmx_nbnxm_gpu_t* nb, const NbnxnPairlistGpu* h_plist, con
      d_plist->haveFreshList = true;
  }
  
-void gpu_upload_shiftvec(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom)
+void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
  {
      cu_atomdata_t* adat = nb->atdat;
      cudaStream_t   ls   = nb->stream[InteractionLocality::Local];
@@ -579,7 +578,7 @@ void gpu_upload_shiftvec(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom)
  }
  
  /*! Clears the first natoms_clear elements of the GPU nonbonded force output array. */
-static void nbnxn_cuda_clear_f(gmx_nbnxm_gpu_t* nb, int natoms_clear)
+static void nbnxn_cuda_clear_f(NbnxmGpu* nb, int natoms_clear)
  {
      cudaError_t    stat;
      cu_atomdata_t* adat = nb->atdat;
@@ -590,7 +589,7 @@ static void nbnxn_cuda_clear_f(gmx_nbnxm_gpu_t* nb, int natoms_clear)
  }
  
  /*! Clears nonbonded shift force output array and energy outputs on the GPU. */
-static void nbnxn_cuda_clear_e_fshift(gmx_nbnxm_gpu_t* nb)
+static void nbnxn_cuda_clear_e_fshift(NbnxmGpu* nb)
  {
      cudaError_t    stat;
      cu_atomdata_t* adat = nb->atdat;
@@ -604,7 +603,7 @@ static void nbnxn_cuda_clear_e_fshift(gmx_nbnxm_gpu_t* nb)
      CU_RET_ERR(stat, "cudaMemsetAsync on e_el falied");
  }
  
-void gpu_clear_outputs(gmx_nbnxm_gpu_t* nb, bool computeVirial)
+void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
  {
      nbnxn_cuda_clear_f(nb, nb->atdat->natoms);
      /* clear shift force array and energies if the outputs were
@@ -615,7 +614,7 @@ void gpu_clear_outputs(gmx_nbnxm_gpu_t* nb, bool computeVirial)
      }
  }
  
-void gpu_init_atomdata(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbat)
+void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
  {
      cudaError_t    stat;
      int            nalloc, natoms;
@@ -702,7 +701,7 @@ static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam)
      }
  }
  
-void gpu_free(gmx_nbnxm_gpu_t* nb)
+void gpu_free(NbnxmGpu* nb)
  {
      cudaError_t    stat;
      cu_atomdata_t* atdat;
@@ -789,7 +788,7 @@ void gpu_free(gmx_nbnxm_gpu_t* nb)
      sfree(atdat);
      sfree(nbparam);
      sfree(nb->timings);
-    sfree(nb);
+    delete nb;
  
      if (debug)
      {
@@ -798,7 +797,7 @@ void gpu_free(gmx_nbnxm_gpu_t* nb)
  }
  
  //! This function is documented in the header file
-gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t* nb)
+gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu* nb)
  {
      return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
  }
@@ -811,38 +810,38 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv)
      }
  }
  
-int gpu_min_ci_balanced(gmx_nbnxm_gpu_t* nb)
+int gpu_min_ci_balanced(NbnxmGpu* nb)
  {
      return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->prop.multiProcessorCount : 0;
  }
  
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t* nb)
+gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
  {
      return ((nb->nbparam->eeltype == eelCuEWALD_ANA) || (nb->nbparam->eeltype == eelCuEWALD_ANA_TWIN));
  }
  
-void* gpu_get_command_stream(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc)
+void* gpu_get_command_stream(NbnxmGpu* nb, const InteractionLocality iloc)
  {
      assert(nb);
  
      return static_cast<void*>(&nb->stream[iloc]);
  }
  
-void* gpu_get_xq(gmx_nbnxm_gpu_t* nb)
+void* gpu_get_xq(NbnxmGpu* nb)
  {
      assert(nb);
  
      return static_cast<void*>(nb->atdat->xq);
  }
  
-void* gpu_get_f(gmx_nbnxm_gpu_t* nb)
+void* gpu_get_f(NbnxmGpu* nb)
  {
      assert(nb);
  
      return static_cast<void*>(nb->atdat->f);
  }
  
-rvec* gpu_get_fshift(gmx_nbnxm_gpu_t* nb)
+rvec* gpu_get_fshift(NbnxmGpu* nb)
  {
      assert(nb);
  
@@ -851,7 +850,7 @@ rvec* gpu_get_fshift(gmx_nbnxm_gpu_t* nb)
  
  /* Initialization for X buffer operations on GPU. */
  /* TODO  Remove explicit pinning from host arrays from here and manage in a more natural way*/
-void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, gmx_nbnxm_gpu_t* gpu_nbv)
+void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, NbnxmGpu* gpu_nbv)
  {
      cudaStream_t stream        = gpu_nbv->stream[InteractionLocality::Local];
      bool         bDoTime       = gpu_nbv->bDoTime;
@@ -937,7 +936,7 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, gmx_nbnxm_gpu_t*
  
  /* Initialization for F buffer operations on GPU. */
  void nbnxn_gpu_init_add_nbat_f_to_f(const int*                  cell,
-                                    gmx_nbnxm_gpu_t*            gpu_nbv,
+                                    NbnxmGpu*                   gpu_nbv,
                                      int                         natoms_total,
                                      GpuEventSynchronizer* const localReductionDone)
  {
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h

index 911536167b53fb489fb4077f23281827ec0d482b..be7d86163915410a85aa2fa2d34a00869f95fa34 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
@@ -128,7 +128,6 @@ enum evdwCu
  /*! \cond */
  typedef struct cu_atomdata cu_atomdata_t;
  typedef struct cu_nbparam  cu_nbparam_t;
-typedef struct nb_staging  nb_staging_t;
  /*! \endcond */
  
  
@@ -138,14 +137,14 @@ typedef struct nb_staging  nb_staging_t;
   *  The energies/shift forces get downloaded here first, before getting added
   *  to the CPU-side aggregate values.
   */
-struct nb_staging
+struct nb_staging_t
  {
      //! LJ energy
-    float* e_lj;
+    float* e_lj = nullptr;
      //! electrostatic energy
-    float* e_el;
+    float* e_el = nullptr;
      //! shift forces
-    float3* fshift;
+    float3* fshift = nullptr;
  };
  
  /** \internal
@@ -267,58 +266,58 @@ class GpuEventSynchronizer;
  /*! \internal
   * \brief Main data structure for CUDA nonbonded force calculations.
   */
-struct gmx_nbnxm_gpu_t
+struct NbnxmGpu
  {
      /*! \brief CUDA device information */
-    const gmx_device_info_t* dev_info;
+    const gmx_device_info_t* dev_info = nullptr;
      /*! \brief true if doing both local/non-local NB work on GPU */
-    bool bUseTwoStreams;
+    bool bUseTwoStreams = false;
      /*! \brief atom data */
-    cu_atomdata_t* atdat;
+    cu_atomdata_t* atdat = nullptr;
      /*! \brief f buf ops cell index mapping */
-    int* cell;
+    int* cell = nullptr;
      /*! \brief number of indices in cell buffer */
-    int ncell;
+    int ncell = 0;
      /*! \brief number of indices allocated in cell buffer */
-    int ncell_alloc;
+    int ncell_alloc = 0;
      /*! \brief array of atom indices */
-    int* atomIndices;
+    int* atomIndices = nullptr;
      /*! \brief size of atom indices */
-    int atomIndicesSize;
+    int atomIndicesSize = 0;
      /*! \brief size of atom indices allocated in device buffer */
-    int atomIndicesSize_alloc;
+    int atomIndicesSize_alloc = 0;
      /*! \brief x buf ops num of atoms */
-    int* cxy_na;
+    int* cxy_na = nullptr;
      /*! \brief number of elements in cxy_na */
-    int ncxy_na;
+    int ncxy_na = 0;
      /*! \brief number of elements allocated allocated in device buffer */
-    int ncxy_na_alloc;
+    int ncxy_na_alloc = 0;
      /*! \brief x buf ops cell index mapping */
-    int* cxy_ind;
+    int* cxy_ind = nullptr;
      /*! \brief number of elements in cxy_ind */
-    int ncxy_ind;
+    int ncxy_ind = 0;
      /*! \brief number of elements allocated allocated in device buffer */
-    int ncxy_ind_alloc;
+    int ncxy_ind_alloc = 0;
      /*! \brief parameters required for the non-bonded calc. */
-    cu_nbparam_t* nbparam;
+    cu_nbparam_t* nbparam = nullptr;
      /*! \brief pair-list data structures (local and non-local) */
-    gmx::EnumerationArray<Nbnxm::InteractionLocality, cu_plist_t*> plist;
+    gmx::EnumerationArray<Nbnxm::InteractionLocality, cu_plist_t*> plist = { { nullptr } };
      /*! \brief staging area where fshift/energies get downloaded */
      nb_staging_t nbst;
      /*! \brief local and non-local GPU streams */
-    gmx::EnumerationArray<Nbnxm::InteractionLocality, cudaStream_t> stream;
+    gmx::EnumerationArray<Nbnxm::InteractionLocality, cudaStream_t> stream = { { nullptr } };
  
      /*! \brief Events used for synchronization */
      /*! \{ */
      /*! \brief Event triggered when the non-local non-bonded
       * kernel is done (and the local transfer can proceed) */
-    cudaEvent_t nonlocal_done;
+    cudaEvent_t nonlocal_done = nullptr;
      /*! \brief Event triggered when the tasks issued in the local
       * stream that need to precede the non-local force or buffer
       * operation calculations are done (e.g. f buffer 0-ing, local
       * x/q H2D, buffer op initialization in local stream that is
       * required also by nonlocal stream ) */
-    cudaEvent_t misc_ops_and_local_H2D_done;
+    cudaEvent_t misc_ops_and_local_H2D_done = nullptr;
      /*! \} */
  
      /*! \brief True if there is work for the current domain in the
@@ -329,7 +328,7 @@ struct gmx_nbnxm_gpu_t
       * domain. As long as bonded work is not split up into
       * local/nonlocal, if there is bonded GPU work, both flags
       * will be true. */
-    gmx::EnumerationArray<Nbnxm::InteractionLocality, bool> haveWork;
+    gmx::EnumerationArray<Nbnxm::InteractionLocality, bool> haveWork = { { false } };
  
      /*! \brief Pointer to event synchronizer triggered when the local
       * GPU buffer ops / reduction is complete
@@ -337,22 +336,22 @@ struct gmx_nbnxm_gpu_t
       * \note That the synchronizer is managed outside of this module
       * in StatePropagatorDataGpu.
       */
-    GpuEventSynchronizer* localFReductionDone;
+    GpuEventSynchronizer* localFReductionDone = nullptr;
  
      /*! \brief Event triggered when non-local coordinate buffer
       * has been copied from device to host. */
-    GpuEventSynchronizer* xNonLocalCopyD2HDone;
+    GpuEventSynchronizer* xNonLocalCopyD2HDone = nullptr;
  
      /* NOTE: With current CUDA versions (<=5.0) timing doesn't work with multiple
       * concurrent streams, so we won't time if both l/nl work is done on GPUs.
       * Timer init/uninit is still done even with timing off so only the condition
       * setting bDoTime needs to be change if this CUDA "feature" gets fixed. */
      /*! \brief True if event-based timing is enabled. */
-    bool bDoTime;
+    bool bDoTime = false;
      /*! \brief CUDA event-based timers. */
-    cu_timers_t* timers;
+    cu_timers_t* timers = nullptr;
      /*! \brief Timing data. TODO: deprecate this and query timers for accumulated data instead */
-    gmx_wallclock_gpu_nbnxn_t* timings;
+    gmx_wallclock_gpu_nbnxn_t* timings = nullptr;
  };
  
  #endif /* NBNXN_CUDA_TYPES_H */
diff --git a/src/gromacs/nbnxm/gpu_common.h b/src/gromacs/nbnxm/gpu_common.h

index a8369ce2d29aaacb775feb9c9811ce63a0cff421..dcfd2f8fefdc9806ca805ce39f6a71dea875f101 100644 (file)
--- a/src/gromacs/nbnxm/gpu_common.h
+++ b/src/gromacs/nbnxm/gpu_common.h
@@ -124,9 +124,7 @@ static inline InteractionLocality gpuAtomToInteractionLocality(const AtomLocalit
  
  
  //NOLINTNEXTLINE(misc-definitions-in-headers)
-void setupGpuShortRangeWork(gmx_nbnxm_gpu_t*               nb,
-                            const gmx::GpuBonded*          gpuBonded,
-                            const gmx::InteractionLocality iLocality)
+void setupGpuShortRangeWork(NbnxmGpu* nb, const gmx::GpuBonded* gpuBonded, const gmx::InteractionLocality iLocality)
  {
      GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
  
@@ -146,13 +144,13 @@ void setupGpuShortRangeWork(gmx_nbnxm_gpu_t*               nb,
   * \param[inout]  nb        Pointer to the nonbonded GPU data structure
   * \param[in]     iLocality Interaction locality identifier
   */
-static bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t& nb, const gmx::InteractionLocality iLocality)
+static bool haveGpuShortRangeWork(const NbnxmGpu& nb, const gmx::InteractionLocality iLocality)
  {
      return nb.haveWork[iLocality];
  }
  
  //NOLINTNEXTLINE(misc-definitions-in-headers)
-bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t* nb, const gmx::AtomLocality aLocality)
+bool haveGpuShortRangeWork(const NbnxmGpu* nb, const gmx::AtomLocality aLocality)
  {
      GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
  
@@ -362,7 +360,7 @@ static inline void gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t* timings,
   * \todo Move into shared source file with gmx_compile_cpp_as_cuda
   */
  //NOLINTNEXTLINE(misc-definitions-in-headers)
-bool gpu_try_finish_task(gmx_nbnxm_gpu_t*         nb,
+bool gpu_try_finish_task(NbnxmGpu*                nb,
                           const gmx::StepWorkload& stepWork,
                           const AtomLocality       aloc,
                           real*                    e_lj,
@@ -458,7 +456,7 @@ bool gpu_try_finish_task(gmx_nbnxm_gpu_t*         nb,
   * \return            The number of cycles the gpu wait took
   */
  //NOLINTNEXTLINE(misc-definitions-in-headers) TODO: move into source file
-float gpu_wait_finish_task(gmx_nbnxm_gpu_t*         nb,
+float gpu_wait_finish_task(NbnxmGpu*                nb,
                             const gmx::StepWorkload& stepWork,
                             AtomLocality             aloc,
                             real*                    e_lj,
diff --git a/src/gromacs/nbnxm/gpu_common_utils.h b/src/gromacs/nbnxm/gpu_common_utils.h

index 176ab8f0457905e8eee5196fbad7621a5e84ba63..4882c3530ee147eb992079cc0d49f906c5ca102d 100644 (file)
--- a/src/gromacs/nbnxm/gpu_common_utils.h
+++ b/src/gromacs/nbnxm/gpu_common_utils.h
@@ -64,7 +64,7 @@ namespace Nbnxm
   * local part of the force array also depends on the non-local kernel.
   * The skip of the local kernel is taken care of separately.
   */
-static inline bool canSkipNonbondedWork(const gmx_nbnxm_gpu_t& nb, InteractionLocality iloc)
+static inline bool canSkipNonbondedWork(const NbnxmGpu& nb, InteractionLocality iloc)
  {
      assert(nb.plist[iloc]);
      return (iloc == InteractionLocality::NonLocal && nb.plist[iloc]->nsci == 0);
diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h

index 2f504e91f8d261abcd28e6c247c99301b05c2b68..30d44159cac0d592d5f0acd5f1a752cb4a531f39 100644 (file)
--- a/src/gromacs/nbnxm/gpu_data_mgmt.h
+++ b/src/gromacs/nbnxm/gpu_data_mgmt.h
@@ -50,7 +50,7 @@
  #include "gromacs/mdtypes/interaction_const.h"
  #include "gromacs/mdtypes/locality.h"
  
-struct gmx_nbnxm_gpu_t;
+struct NbnxmGpu;
  struct gmx_gpu_info_t;
  struct gmx_device_info_t;
  struct gmx_wallclock_gpu_nbnxn_t;
@@ -63,23 +63,23 @@ namespace Nbnxm
  
  /** Initializes the data structures related to GPU nonbonded calculations. */
  GPU_FUNC_QUALIFIER
-gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo,
-                          const interaction_const_t gmx_unused* ic,
-                          const PairlistParams gmx_unused& listParams,
-                          const nbnxn_atomdata_t gmx_unused* nbat,
-                          int gmx_unused rank,
-                          /* true if both local and non-local are done on GPU */
-                          gmx_bool gmx_unused bLocalAndNonlocal) GPU_FUNC_TERM_WITH_RETURN(nullptr);
+NbnxmGpu* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo,
+                   const interaction_const_t gmx_unused* ic,
+                   const PairlistParams gmx_unused& listParams,
+                   const nbnxn_atomdata_t gmx_unused* nbat,
+                   int gmx_unused rank,
+                   /* true if both local and non-local are done on GPU */
+                   gmx_bool gmx_unused bLocalAndNonlocal) GPU_FUNC_TERM_WITH_RETURN(nullptr);
  
  /** Initializes pair-list data for GPU, called at every pair search step. */
  GPU_FUNC_QUALIFIER
-void gpu_init_pairlist(gmx_nbnxm_gpu_t gmx_unused*   nb,
+void gpu_init_pairlist(NbnxmGpu gmx_unused*          nb,
                         const struct NbnxnPairlistGpu gmx_unused* h_nblist,
                         gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
  
  /** Initializes atom-data on the GPU, called at every pair search step. */
  GPU_FUNC_QUALIFIER
-void gpu_init_atomdata(gmx_nbnxm_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbat) GPU_FUNC_TERM;
+void gpu_init_atomdata(NbnxmGpu gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbat) GPU_FUNC_TERM;
  
  /*! \brief Re-generate the GPU Ewald force table, resets rlist, and update the
   *  electrostatic type switching to twin cut-off (or back) if needed.
@@ -90,19 +90,19 @@ void gpu_pme_loadbal_update_param(const struct nonbonded_verlet_t gmx_unused* nb
  
  /** Uploads shift vector to the GPU if the box is dynamic (otherwise just returns). */
  GPU_FUNC_QUALIFIER
-void gpu_upload_shiftvec(gmx_nbnxm_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbatom) GPU_FUNC_TERM;
+void gpu_upload_shiftvec(NbnxmGpu gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbatom) GPU_FUNC_TERM;
  
  /** Clears GPU outputs: nonbonded force, shift force and energy. */
  GPU_FUNC_QUALIFIER
-void gpu_clear_outputs(gmx_nbnxm_gpu_t gmx_unused* nb, bool gmx_unused computeVirial) GPU_FUNC_TERM;
+void gpu_clear_outputs(NbnxmGpu gmx_unused* nb, bool gmx_unused computeVirial) GPU_FUNC_TERM;
  
  /** Frees all GPU resources used for the nonbonded calculations. */
  GPU_FUNC_QUALIFIER
-void gpu_free(gmx_nbnxm_gpu_t gmx_unused* nb) GPU_FUNC_TERM;
+void gpu_free(NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM;
  
  /** Returns the GPU timings structure or NULL if GPU is not used or timing is off. */
  GPU_FUNC_QUALIFIER
-struct gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t gmx_unused* nb)
+struct gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu gmx_unused* nb)
          GPU_FUNC_TERM_WITH_RETURN(nullptr);
  
  /** Resets nonbonded GPU timings. */
@@ -112,37 +112,36 @@ void gpu_reset_timings(struct nonbonded_verlet_t gmx_unused* nbv) GPU_FUNC_TERM;
  /** Calculates the minimum size of proximity lists to improve SM load balance
   *  with GPU non-bonded kernels. */
  GPU_FUNC_QUALIFIER
-int gpu_min_ci_balanced(gmx_nbnxm_gpu_t gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1);
+int gpu_min_ci_balanced(NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1);
  
  /** Returns if analytical Ewald GPU kernels are used. */
  GPU_FUNC_QUALIFIER
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t gmx_unused* nb)
-        GPU_FUNC_TERM_WITH_RETURN(FALSE);
+gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(FALSE);
  
  /** Returns an opaque pointer to the GPU command stream
   *  Note: CUDA only.
   */
  CUDA_FUNC_QUALIFIER
-void* gpu_get_command_stream(gmx_nbnxm_gpu_t gmx_unused* nb, gmx::InteractionLocality gmx_unused iloc)
+void* gpu_get_command_stream(NbnxmGpu gmx_unused* nb, gmx::InteractionLocality gmx_unused iloc)
          CUDA_FUNC_TERM_WITH_RETURN(nullptr);
  
  /** Returns an opaque pointer to the GPU coordinate+charge array
   *  Note: CUDA only.
   */
  CUDA_FUNC_QUALIFIER
-void* gpu_get_xq(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+void* gpu_get_xq(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
  
  /** Returns an opaque pointer to the GPU force array
   *  Note: CUDA only.
   */
  CUDA_FUNC_QUALIFIER
-void* gpu_get_f(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+void* gpu_get_f(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
  
  /** Returns an opaque pointer to the GPU shift force array
   *  Note: CUDA only.
   */
  CUDA_FUNC_QUALIFIER
-rvec* gpu_get_fshift(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+rvec* gpu_get_fshift(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
  
  } // namespace Nbnxm
  
diff --git a/src/gromacs/nbnxm/gpu_jit_support.h b/src/gromacs/nbnxm/gpu_jit_support.h

index 3a5928d2bb687c0d7b0e5f0965d49d6db893fac0..183fcadc7ca929991308dbd4299be938d567d60d 100644 (file)
--- a/src/gromacs/nbnxm/gpu_jit_support.h
+++ b/src/gromacs/nbnxm/gpu_jit_support.h
@@ -46,9 +46,9 @@
  
  #include "gromacs/utility/basedefinitions.h"
  
-struct gmx_nbnxm_gpu_t;
+struct NbnxmGpu;
  
  /*! \brief Handles any JIT compilation of nbnxn kernels for the selected device */
-OPENCL_FUNC_QUALIFIER void nbnxn_gpu_compile_kernels(gmx_nbnxm_gpu_t gmx_unused* nb) OPENCL_FUNC_TERM;
+OPENCL_FUNC_QUALIFIER void nbnxn_gpu_compile_kernels(NbnxmGpu gmx_unused* nb) OPENCL_FUNC_TERM;
  
  #endif
diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h

index c820e5772696677ae781dc1a6ed0c0ac9d200d83..4fc235ca4f179ab2233c9510e5c67a77ad27e5be 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm.h
+++ b/src/gromacs/nbnxm/nbnxm.h
@@ -125,7 +125,7 @@ struct gmx_domdec_zones_t;
  struct gmx_enerdata_t;
  struct gmx_hw_info_t;
  struct gmx_mtop_t;
-struct gmx_nbnxm_gpu_t;
+struct NbnxmGpu;
  struct gmx_wallcycle;
  struct interaction_const_t;
  struct nbnxn_atomdata_t;
@@ -225,7 +225,7 @@ public:
                         std::unique_ptr<PairSearch>       pairSearch,
                         std::unique_ptr<nbnxn_atomdata_t> nbat,
                         const Nbnxm::KernelSetup&         kernelSetup,
-                       gmx_nbnxm_gpu_t*                  gpu_nbv,
+                       NbnxmGpu*                         gpu_nbv,
                         gmx_wallcycle*                    wcycle);
  
      ~nonbonded_verlet_t();
@@ -403,7 +403,7 @@ private:
  
  public:
      //! GPU Nbnxm data, only used with a physical GPU (TODO: use unique_ptr)
-    gmx_nbnxm_gpu_t* gpu_nbv;
+    NbnxmGpu* gpu_nbv;
  };
  
  namespace Nbnxm
diff --git a/src/gromacs/nbnxm/nbnxm_gpu.h b/src/gromacs/nbnxm/nbnxm_gpu.h

index adbbcf7f0caaf3b0a4f8e05c13da8f3bb107164e..7b9e4b80f9f1df63ebab8cc59a0fdb7ba983244f 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_gpu.h
+++ b/src/gromacs/nbnxm/nbnxm_gpu.h
@@ -78,7 +78,7 @@ class Grid;
   * \param [in]    aloc      Atom locality flag.
   */
  GPU_FUNC_QUALIFIER
-void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t gmx_unused*   nb,
+void gpu_copy_xq_to_gpu(NbnxmGpu gmx_unused*          nb,
                          const struct nbnxn_atomdata_t gmx_unused* nbdata,
                          gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
  
@@ -93,7 +93,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t gmx_unused*   nb,
   *
   */
  GPU_FUNC_QUALIFIER
-void gpu_launch_kernel(gmx_nbnxm_gpu_t gmx_unused* nb,
+void gpu_launch_kernel(NbnxmGpu gmx_unused*    nb,
                         const gmx::StepWorkload gmx_unused& stepWork,
                         gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
  
@@ -133,7 +133,7 @@ void gpu_launch_kernel(gmx_nbnxm_gpu_t gmx_unused* nb,
   * \param [in]    numParts  Number of parts the pair list is split into in the rolling kernel.
   */
  GPU_FUNC_QUALIFIER
-void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t gmx_unused* nb,
+void gpu_launch_kernel_pruneonly(NbnxmGpu gmx_unused*     nb,
                                   gmx::InteractionLocality gmx_unused iloc,
                                   int gmx_unused numParts) GPU_FUNC_TERM;
  
@@ -142,7 +142,7 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t gmx_unused* nb,
   * (and energies/shift forces if required).
   */
  GPU_FUNC_QUALIFIER
-void gpu_launch_cpyback(gmx_nbnxm_gpu_t gmx_unused* nb,
+void gpu_launch_cpyback(NbnxmGpu gmx_unused* nb,
                          nbnxn_atomdata_t gmx_unused* nbatom,
                          const gmx::StepWorkload gmx_unused& stepWork,
                          gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
@@ -185,7 +185,7 @@ void gpu_launch_cpyback(gmx_nbnxm_gpu_t gmx_unused* nb,
   * \returns                   True if the nonbonded tasks associated with \p aloc locality have completed
   */
  GPU_FUNC_QUALIFIER
-bool gpu_try_finish_task(gmx_nbnxm_gpu_t gmx_unused* nb,
+bool gpu_try_finish_task(NbnxmGpu gmx_unused*    nb,
                           const gmx::StepWorkload gmx_unused& stepWork,
                           gmx::AtomLocality gmx_unused aloc,
                           real gmx_unused* e_lj,
@@ -209,7 +209,7 @@ bool gpu_try_finish_task(gmx_nbnxm_gpu_t gmx_unused* nb,
   * \param[out] shiftForces Shift forces buffer to accumulate into
   * \param[out] wcycle         Pointer to wallcycle data structure               */
  GPU_FUNC_QUALIFIER
-float gpu_wait_finish_task(gmx_nbnxm_gpu_t gmx_unused* nb,
+float gpu_wait_finish_task(NbnxmGpu gmx_unused*    nb,
                             const gmx::StepWorkload gmx_unused& stepWork,
                             gmx::AtomLocality gmx_unused aloc,
                             real gmx_unused* e_lj,
@@ -226,7 +226,7 @@ int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic)
   * Called on the NS step and performs (re-)allocations and memory copies. !*/
  CUDA_FUNC_QUALIFIER
  void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused& gridSet,
-                                gmx_nbnxm_gpu_t gmx_unused* gpu_nbv) CUDA_FUNC_TERM;
+                                NbnxmGpu gmx_unused* gpu_nbv) CUDA_FUNC_TERM;
  
  /*! \brief X buffer operations on GPU: performs conversion from rvec to nb format.
   *
@@ -242,7 +242,7 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused& gridSet,
  CUDA_FUNC_QUALIFIER
  void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid,
                             bool gmx_unused setFillerCoords,
-                           gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
+                           NbnxmGpu gmx_unused* gpu_nbv,
                             DeviceBuffer<float> gmx_unused d_x,
                             GpuEventSynchronizer gmx_unused* xReadyOnDevice,
                             gmx::AtomLocality gmx_unused locality,
@@ -254,7 +254,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid,
   * \param[in] interactionLocality  Local or NonLocal sync point
   */
  CUDA_FUNC_QUALIFIER
-void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxm_gpu_t gmx_unused* nb,
+void nbnxnInsertNonlocalGpuDependency(const NbnxmGpu gmx_unused* nb,
                                        gmx::InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM;
  
  /*! \brief Set up internal flags that indicate what type of short-range work there is.
@@ -270,7 +270,7 @@ void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxm_gpu_t gmx_unused* nb,
   * \param[in]     iLocality  Interaction locality identifier
   */
  GPU_FUNC_QUALIFIER
-void setupGpuShortRangeWork(gmx_nbnxm_gpu_t gmx_unused* nb,
+void setupGpuShortRangeWork(NbnxmGpu gmx_unused* nb,
                              const gmx::GpuBonded gmx_unused* gpuBonded,
                              gmx::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM;
  
@@ -284,13 +284,13 @@ void setupGpuShortRangeWork(gmx_nbnxm_gpu_t gmx_unused* nb,
   * \param[in]     aLocality Atom locality identifier
   */
  GPU_FUNC_QUALIFIER
-bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t gmx_unused* nb, gmx::AtomLocality gmx_unused aLocality)
+bool haveGpuShortRangeWork(const NbnxmGpu gmx_unused* nb, gmx::AtomLocality gmx_unused aLocality)
          GPU_FUNC_TERM_WITH_RETURN(false);
  
  /*! \brief Initialization for F buffer operations on GPU */
  CUDA_FUNC_QUALIFIER
  void nbnxn_gpu_init_add_nbat_f_to_f(const int gmx_unused* cell,
-                                    gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
+                                    NbnxmGpu gmx_unused* gpu_nbv,
                                      int gmx_unused       natoms_total,
                                      GpuEventSynchronizer gmx_unused* localReductionDone) CUDA_FUNC_TERM;
  
@@ -313,7 +313,7 @@ void nbnxn_gpu_init_add_nbat_f_to_f(const int gmx_unused* cell,
  CUDA_FUNC_QUALIFIER
  void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality,
                                 DeviceBuffer<float> gmx_unused totalForcesDevice,
-                               gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
+                               NbnxmGpu gmx_unused* gpu_nbv,
                                 void gmx_unused*                           pmeForcesDevice,
                                 gmx::ArrayRef<GpuEventSynchronizer* const> gmx_unused dependencyList,
                                 int gmx_unused atomStart,
@@ -325,7 +325,7 @@ void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality,
   * \param[in] nb                   The nonbonded data GPU structure
   */
  CUDA_FUNC_QUALIFIER
-void nbnxn_wait_x_on_device(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
+void nbnxn_wait_x_on_device(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM;
  
  } // namespace Nbnxm
  #endif
diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp

index 745414b67d5209de070969cad624f556be0e65f4..35aea4ae3dcdfbd6ea5349e10ee58b0b34b6455e 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_setup.cpp
+++ b/src/gromacs/nbnxm/nbnxm_setup.cpp
@@ -320,7 +320,7 @@ namespace Nbnxm
  {
  
  /*! \brief Gets and returns the minimum i-list count for balacing based on the GPU used or env.var. when set */
-static int getMinimumIlistCountForGpuBalancing(gmx_nbnxm_gpu_t* nbnxmGpu)
+static int getMinimumIlistCountForGpuBalancing(NbnxmGpu* nbnxmGpu)
  {
      int minimumIlistCount;
  
@@ -440,8 +440,8 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
                          fr->nbfp, mimimumNumEnergyGroupNonbonded,
                          (useGpu || emulateGpu) ? 1 : gmx_omp_nthreads_get(emntNonbonded));
  
-    gmx_nbnxm_gpu_t* gpu_nbv                          = nullptr;
-    int              minimumIlistCountForGpuBalancing = 0;
+    NbnxmGpu* gpu_nbv                          = nullptr;
+    int       minimumIlistCountForGpuBalancing = 0;
      if (useGpu)
      {
          /* init the NxN GPU data; the last argument tells whether we'll have
@@ -469,7 +469,7 @@ nonbonded_verlet_t::nonbonded_verlet_t(std::unique_ptr<PairlistSets>     pairlis
                                         std::unique_ptr<PairSearch>       pairSearch,
                                         std::unique_ptr<nbnxn_atomdata_t> nbat_in,
                                         const Nbnxm::KernelSetup&         kernelSetup,
-                                       gmx_nbnxm_gpu_t*                  gpu_nbv_ptr,
+                                       NbnxmGpu*                         gpu_nbv_ptr,
                                         gmx_wallcycle*                    wcycle) :
      pairlistSets_(std::move(pairlistSets)),
      pairSearch_(std::move(pairSearch)),
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp

index c795937a8441dbacd40faa246dcbf22eee1129fc..f4c291ce7cd9d24d862a4144cab17bd4c5768c52 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
@@ -341,7 +341,7 @@ static inline cl_kernel selectPruneKernel(cl_kernel kernel_pruneonly[], bool fir
   *  OpenCL kernel objects are cached in nb. If the requested kernel is not
   *  found in the cache, it will be created and the cache will be updated.
   */
-static inline cl_kernel select_nbnxn_kernel(gmx_nbnxm_gpu_t* nb, int eeltype, int evdwtype, bool bDoEne, bool bDoPrune)
+static inline cl_kernel select_nbnxn_kernel(NbnxmGpu* nb, int eeltype, int evdwtype, bool bDoEne, bool bDoPrune)
  {
      const char* kernel_name_to_run;
      cl_kernel*  kernel_ptr;
@@ -471,7 +471,7 @@ static void sync_ocl_event(cl_command_queue stream, cl_event* ocl_event)
  }
  
  /*! \brief Launch asynchronously the xq buffer host to device copy. */
-void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
+void gpu_copy_xq_to_gpu(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
  {
      GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
  
@@ -575,7 +575,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom, con
     misc_ops_done event to record the point in time when the above  operations
     are finished and synchronize with this event in the non-local stream.
   */
-void gpu_launch_kernel(gmx_nbnxm_gpu_t* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
+void gpu_launch_kernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
  {
      cl_atomdata_t*   adat   = nb->atdat;
      cl_nbparam_t*    nbp    = nb->nbparam;
@@ -713,7 +713,7 @@ static inline int calc_shmem_required_prune(const int num_threads_z)
   * Launch the pairlist prune only kernel for the given locality.
   * \p numParts tells in how many parts, i.e. calls the list will be pruned.
   */
-void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc, const int numParts)
+void gpu_launch_kernel_pruneonly(NbnxmGpu* nb, const InteractionLocality iloc, const int numParts)
  {
      cl_atomdata_t*   adat    = nb->atdat;
      cl_nbparam_t*    nbp     = nb->nbparam;
@@ -839,7 +839,7 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t* nb, const InteractionLocality
   * Launch asynchronously the download of nonbonded forces from the GPU
   * (and energies/shift forces if required).
   */
-void gpu_launch_cpyback(gmx_nbnxm_gpu_t*         nb,
+void gpu_launch_cpyback(NbnxmGpu*                nb,
                          struct nbnxn_atomdata_t* nbatom,
                          const gmx::StepWorkload& stepWork,
                          const AtomLocality       aloc)
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp

index 4943a8e0dd50afe97fcd040375e29ed4d9a2e4e4..f8822ae31d5f44a9c047a7df9f7752a6037f920c 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -170,7 +170,7 @@ static void init_atomdata_first(cl_atomdata_t* ad, int ntypes, gmx_device_runtim
  
      /* An element of the fshift device buffer has the same size as one element
         of the host side fshift buffer. */
-    ad->fshift_elem_size = sizeof(*cl_nb_staging_t::fshift);
+    ad->fshift_elem_size = sizeof(*nb_staging_t::fshift);
  
      ad->fshift = clCreateBuffer(runData->context, CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY,
                                  SHIFTS * ad->fshift_elem_size, nullptr, &cl_error);
@@ -407,8 +407,8 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti
      {
          return;
      }
-    gmx_nbnxm_gpu_t* nb  = nbv->gpu_nbv;
-    cl_nbparam_t*    nbp = nb->nbparam;
+    NbnxmGpu*     nb  = nbv->gpu_nbv;
+    cl_nbparam_t* nbp = nb->nbparam;
  
      set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
  
@@ -527,7 +527,7 @@ static void nbnxn_gpu_create_context(gmx_device_runtime_data_t* runtimeData,
  }
  
  /*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */
-static cl_kernel nbnxn_gpu_create_kernel(gmx_nbnxm_gpu_t* nb, const char* kernel_name)
+static cl_kernel nbnxn_gpu_create_kernel(NbnxmGpu* nb, const char* kernel_name)
  {
      cl_kernel kernel;
      cl_int    cl_error;
@@ -544,7 +544,7 @@ static cl_kernel nbnxn_gpu_create_kernel(gmx_nbnxm_gpu_t* nb, const char* kernel
  
  /*! \brief Clears nonbonded shift force output array and energy outputs on the GPU.
   */
-static void nbnxn_ocl_clear_e_fshift(gmx_nbnxm_gpu_t* nb)
+static void nbnxn_ocl_clear_e_fshift(NbnxmGpu* nb)
  {
  
      cl_int           cl_error;
@@ -577,7 +577,7 @@ static void nbnxn_ocl_clear_e_fshift(gmx_nbnxm_gpu_t* nb)
  }
  
  /*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */
-static void nbnxn_gpu_init_kernels(gmx_nbnxm_gpu_t* nb)
+static void nbnxn_gpu_init_kernels(NbnxmGpu* nb)
  {
      /* Init to 0 main kernel arrays */
      /* They will be later on initialized in select_nbnxn_kernel */
@@ -606,7 +606,7 @@ static void nbnxn_gpu_init_kernels(gmx_nbnxm_gpu_t* nb)
   *  Initializes members of the atomdata and nbparam structs and
   *  clears e/fshift output buffers.
   */
-static void nbnxn_ocl_init_const(gmx_nbnxm_gpu_t*                nb,
+static void nbnxn_ocl_init_const(NbnxmGpu*                       nb,
                                   const interaction_const_t*      ic,
                                   const PairlistParams&           listParams,
                                   const nbnxn_atomdata_t::Params& nbatParams)
@@ -617,20 +617,19 @@ static void nbnxn_ocl_init_const(gmx_nbnxm_gpu_t*                nb,
  
  
  //! This function is documented in the header file
-gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t*   deviceInfo,
-                          const interaction_const_t* ic,
-                          const PairlistParams&      listParams,
-                          const nbnxn_atomdata_t*    nbat,
-                          const int                  rank,
-                          const gmx_bool             bLocalAndNonlocal)
+NbnxmGpu* gpu_init(const gmx_device_info_t*   deviceInfo,
+                   const interaction_const_t* ic,
+                   const PairlistParams&      listParams,
+                   const nbnxn_atomdata_t*    nbat,
+                   const int                  rank,
+                   const gmx_bool             bLocalAndNonlocal)
  {
-    gmx_nbnxm_gpu_t*            nb;
      cl_int                      cl_error;
      cl_command_queue_properties queue_properties;
  
      assert(ic);
  
-    snew(nb, 1);
+    auto nb = new NbnxmGpu;
      snew(nb->atdat, 1);
      snew(nb->nbparam, 1);
      snew(nb->plist[InteractionLocality::Local], 1);
@@ -728,7 +727,7 @@ gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t*   deviceInfo,
  
  /*! \brief Clears the first natoms_clear elements of the GPU nonbonded force output array.
   */
-static void nbnxn_ocl_clear_f(gmx_nbnxm_gpu_t* nb, int natoms_clear)
+static void nbnxn_ocl_clear_f(NbnxmGpu* nb, int natoms_clear)
  {
      if (natoms_clear == 0)
      {
@@ -748,7 +747,7 @@ static void nbnxn_ocl_clear_f(gmx_nbnxm_gpu_t* nb, int natoms_clear)
  }
  
  //! This function is documented in the header file
-void gpu_clear_outputs(gmx_nbnxm_gpu_t* nb, bool computeVirial)
+void gpu_clear_outputs(NbnxmGpu* nb, bool computeVirial)
  {
      nbnxn_ocl_clear_f(nb, nb->atdat->natoms);
      /* clear shift force array and energies if the outputs were
@@ -765,7 +764,7 @@ void gpu_clear_outputs(gmx_nbnxm_gpu_t* nb, bool computeVirial)
  }
  
  //! This function is documented in the header file
-void gpu_init_pairlist(gmx_nbnxm_gpu_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
+void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
  {
      char sbuf[STRLEN];
      // Timing accumulation should happen only if there was work to do
@@ -826,7 +825,7 @@ void gpu_init_pairlist(gmx_nbnxm_gpu_t* nb, const NbnxnPairlistGpu* h_plist, con
  }
  
  //! This function is documented in the header file
-void gpu_upload_shiftvec(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom)
+void gpu_upload_shiftvec(NbnxmGpu* nb, const nbnxn_atomdata_t* nbatom)
  {
      cl_atomdata_t*   adat = nb->atdat;
      cl_command_queue ls   = nb->stream[InteractionLocality::Local];
@@ -841,7 +840,7 @@ void gpu_upload_shiftvec(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom)
  }
  
  //! This function is documented in the header file
-void gpu_init_atomdata(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbat)
+void gpu_init_atomdata(NbnxmGpu* nb, const nbnxn_atomdata_t* nbat)
  {
      cl_int           cl_error;
      int              nalloc, natoms;
@@ -1001,7 +1000,7 @@ static void free_gpu_device_runtime_data(gmx_device_runtime_data_t* runData)
  }
  
  //! This function is documented in the header file
-void gpu_free(gmx_nbnxm_gpu_t* nb)
+void gpu_free(NbnxmGpu* nb)
  {
      if (nb == nullptr)
      {
@@ -1093,7 +1092,7 @@ void gpu_free(gmx_nbnxm_gpu_t* nb)
      /* Free timers and timings */
      delete nb->timers;
      sfree(nb->timings);
-    sfree(nb);
+    delete nb;
  
      if (debug)
      {
@@ -1102,7 +1101,7 @@ void gpu_free(gmx_nbnxm_gpu_t* nb)
  }
  
  //! This function is documented in the header file
-gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t* nb)
+gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu* nb)
  {
      return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
  }
@@ -1117,13 +1116,13 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv)
  }
  
  //! This function is documented in the header file
-int gpu_min_ci_balanced(gmx_nbnxm_gpu_t* nb)
+int gpu_min_ci_balanced(NbnxmGpu* nb)
  {
      return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->compute_units : 0;
  }
  
  //! This function is documented in the header file
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t* nb)
+gmx_bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
  {
      return ((nb->nbparam->eeltype == eelOclEWALD_ANA) || (nb->nbparam->eeltype == eelOclEWALD_ANA_TWIN));
  }
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp

index 3ea5cc186df8f33c38837c98f29dee13c91f4c80..dba641514997be022b746940bd7e8102d0f2d494 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
@@ -167,7 +167,7 @@ static std::string makeDefinesForKernelTypes(bool bFastGen, int eeltype, int vdw
   *
   * Does not throw
   */
-void nbnxn_gpu_compile_kernels(gmx_nbnxm_gpu_t* nb)
+void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
  {
      gmx_bool   bFastGen = TRUE;
      cl_program program  = nullptr;
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h

index c6f5636658e86bb5e022c680c6c1bb99dde6307a..a9379eea8f9307c1ca554f09be829e947d0f9545 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
@@ -60,6 +60,8 @@
  
  #include "nbnxm_ocl_consts.h"
  
+struct gmx_wallclock_gpu_nbnxn_t;
+
  /* kernel does #include "gromacs/math/utilities.h" */
  /* Move the actual useful stuff here: */
  
@@ -151,15 +153,15 @@ enum ePruneKind
   *  The energies/shift forces get downloaded here first, before getting added
   *  to the CPU-side aggregate values.
   */
-typedef struct cl_nb_staging
+struct nb_staging_t
  {
      //! LJ energy
-    float* e_lj;
+    float* e_lj = nullptr;
      //! electrostatic energy
-    float* e_el;
+    float* e_el = nullptr;
      //! float3 buffer with shift forces
-    float (*fshift)[3];
-} cl_nb_staging_t;
+    float (*fshift)[3] = nullptr;
+};
  
  /*! \internal
   * \brief Nonbonded atom data - both inputs and outputs.
@@ -204,7 +206,7 @@ typedef struct cl_atomdata
      size_t shift_vec_elem_size;
  
      //! true if the shift vector has been uploaded
-    cl_bool bShiftVecUploaded;
+    bool bShiftVecUploaded;
  } cl_atomdata_t;
  
  /*! \internal
@@ -333,48 +335,48 @@ typedef struct Nbnxm::gpu_timers_t cl_timers_t;
  /*! \internal
   * \brief Main data structure for OpenCL nonbonded force calculations.
   */
-struct gmx_nbnxm_gpu_t
+struct NbnxmGpu
  {
      //! OpenCL device information
-    const gmx_device_info_t* dev_info;
+    const gmx_device_info_t* dev_info = nullptr;
      //! OpenCL runtime data (context, kernels)
-    struct gmx_device_runtime_data_t* dev_rundata;
+    struct gmx_device_runtime_data_t* dev_rundata = nullptr;
  
      /**< Pointers to non-bonded kernel functions
       * organized similar with nb_kfunc_xxx arrays in nbnxn_ocl.cpp */
      ///@{
-    cl_kernel kernel_noener_noprune_ptr[eelOclNR][evdwOclNR];
-    cl_kernel kernel_ener_noprune_ptr[eelOclNR][evdwOclNR];
-    cl_kernel kernel_noener_prune_ptr[eelOclNR][evdwOclNR];
-    cl_kernel kernel_ener_prune_ptr[eelOclNR][evdwOclNR];
+    cl_kernel kernel_noener_noprune_ptr[eelOclNR][evdwOclNR] = { { nullptr } };
+    cl_kernel kernel_ener_noprune_ptr[eelOclNR][evdwOclNR]   = { { nullptr } };
+    cl_kernel kernel_noener_prune_ptr[eelOclNR][evdwOclNR]   = { { nullptr } };
+    cl_kernel kernel_ener_prune_ptr[eelOclNR][evdwOclNR]     = { { nullptr } };
      ///@}
      //! prune kernels, ePruneKind defined the kernel kinds
-    cl_kernel kernel_pruneonly[ePruneNR];
+    cl_kernel kernel_pruneonly[ePruneNR] = { nullptr };
  
      //! true if prefetching fg i-atom LJ parameters should be used in the kernels
-    bool bPrefetchLjParam;
+    bool bPrefetchLjParam = false;
  
      /**< auxiliary kernels implementing memset-like functions */
      ///@{
-    cl_kernel kernel_memset_f;
-    cl_kernel kernel_memset_f2;
-    cl_kernel kernel_memset_f3;
-    cl_kernel kernel_zero_e_fshift;
+    cl_kernel kernel_memset_f      = nullptr;
+    cl_kernel kernel_memset_f2     = nullptr;
+    cl_kernel kernel_memset_f3     = nullptr;
+    cl_kernel kernel_zero_e_fshift = nullptr;
      ///@}
  
      //! true if doing both local/non-local NB work on GPU
-    cl_bool bUseTwoStreams;
+    bool bUseTwoStreams = false;
      //! true indicates that the nonlocal_done event was enqueued
-    cl_bool bNonLocalStreamActive;
+    bool bNonLocalStreamActive = false;
  
      //! atom data
-    cl_atomdata_t* atdat;
+    cl_atomdata_t* atdat = nullptr;
      //! parameters required for the non-bonded calc.
-    cl_nbparam_t* nbparam;
+    cl_nbparam_t* nbparam = nullptr;
      //! pair-list data structures (local and non-local)
-    gmx::EnumerationArray<Nbnxm::InteractionLocality, cl_plist_t*> plist;
+    gmx::EnumerationArray<Nbnxm::InteractionLocality, cl_plist_t*> plist = { nullptr };
      //! staging area where fshift/energies get downloaded
-    cl_nb_staging_t nbst;
+    nb_staging_t nbst;
  
      //! local and non-local GPU queues
      gmx::EnumerationArray<Nbnxm::InteractionLocality, cl_command_queue> stream;
@@ -383,13 +385,13 @@ struct gmx_nbnxm_gpu_t
      /*! \{ */
      /*! \brief Event triggered when the non-local non-bonded
       * kernel is done (and the local transfer can proceed) */
-    cl_event nonlocal_done;
+    cl_event nonlocal_done = nullptr;
      /*! \brief Event triggered when the tasks issued in the local
       * stream that need to precede the non-local force or buffer
       * operation calculations are done (e.g. f buffer 0-ing, local
       * x/q H2D, buffer op initialization in local stream that is
       * required also by nonlocal stream ) */
-    cl_event misc_ops_and_local_H2D_done;
+    cl_event misc_ops_and_local_H2D_done = nullptr;
      /*! \} */
  
      //! True if there has been local/nonlocal GPU work, either bonded or nonbonded, scheduled
@@ -399,11 +401,11 @@ struct gmx_nbnxm_gpu_t
  
  
      //! True if event-based timing is enabled.
-    cl_bool bDoTime;
+    bool bDoTime = false;
      //! OpenCL event-based timers.
-    cl_timers_t* timers;
+    cl_timers_t* timers = nullptr;
      //! Timing data. TODO: deprecate this and query timers for accumulated data instead
-    struct gmx_wallclock_gpu_nbnxn_t* timings;
+    gmx_wallclock_gpu_nbnxn_t* timings = nullptr;
  };
  
  #endif /* NBNXN_OPENCL_TYPES_H */
author	Mark Abraham <mark.j.abraham@gmail.com>
	Sun, 26 Jan 2020 15:33:45 +0000 (16:33 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Mon, 27 Jan 2020 07:39:47 +0000 (08:39 +0100)
docs/doxygen/suppressions.txt		patch \| blob \| history
src/gromacs/nbnxm/atomdata.cpp		patch \| blob \| history
src/gromacs/nbnxm/atomdata.h		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_common.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_common_utils.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_data_mgmt.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_jit_support.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_gpu.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_setup.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h		patch \| blob \| history