Make nbnxm headers more self-contained

author Mark Abraham <mark.j.abraham@gmail.com>

Tue, 14 Jan 2020 15:53:26 +0000 (16:53 +0100)

committer Paul Bauer <paul.bauer.q@gmail.com>

Thu, 16 Jan 2020 10:55:02 +0000 (11:55 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Tue, 14 Jan 2020 15:53:26 +0000 (16:53 +0100)
committer Paul Bauer <paul.bauer.q@gmail.com>
Thu, 16 Jan 2020 10:55:02 +0000 (11:55 +0100)
diff --git a/docs/doxygen/suppressions.txt b/docs/doxygen/suppressions.txt

index ab296f52508c7c81af01b4605e9b1708e00fc9e4..8f3a2dd4798272afd8dc918ea835c7eee67890c1 100644 (file)
--- a/docs/doxygen/suppressions.txt
+++ b/docs/doxygen/suppressions.txt
@@ -34,6 +34,9 @@ src/gromacs/nbnxm/pairlist_simd_4xm.h: warning: should include "simd.h"
  src/gromacs/nbnxm/kernels_simd_2xmm/kernel_common.h: warning: should include "nbnxm_simd.h"
  src/gromacs/nbnxm/kernels_simd_4xm/kernel_common.h: warning: should include "nbnxm_simd.h"
  
+# This seems to be a false positive
+src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h: error: gmx_nbnxm_gpu_t: is in internal file(s), but appears in public documentation
+
  # Temporary while we change the SIMD implementation
  src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace_common.h: warning: should include "simd.h"
  
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index 934714a2a00f194c2018aaf1e38970ef559a1dfe..868ef05a4a46db867b747abf1a661db99cf55094 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -91,6 +91,7 @@
  #include "gromacs/mdtypes/state_propagator_data_gpu.h"
  #include "gromacs/nbnxm/gpu_data_mgmt.h"
  #include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_gpu.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/pbcutil/mshift.h"
  #include "gromacs/pbcutil/pbc.h"
diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp

index 07684c90092d0db2957748c738695f5e1252490c..8fd60a8f03e6bd7d52d42dc439a53ef278e22496 100644 (file)
--- a/src/gromacs/nbnxm/atomdata.cpp
+++ b/src/gromacs/nbnxm/atomdata.cpp
@@ -2,7 +2,7 @@
   * This file is part of the GROMACS molecular simulation package.
   *
   * Copyright (c) 2012-2018, The GROMACS development team.
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -67,6 +67,7 @@
  #include "grid.h"
  #include "gridset.h"
  #include "nbnxm_geometry.h"
+#include "nbnxm_gpu.h"
  #include "pairlist.h"
  
  using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
@@ -1073,7 +1074,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet&   gridSet,
  void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet&   gridSet,
                                      const gmx::AtomLocality locality,
                                      bool                    fillLocal,
-                                    gmx_nbnxn_gpu_t*        gpu_nbv,
+                                    gmx_nbnxm_gpu_t*        gpu_nbv,
                                      DeviceBuffer<float>     d_x,
                                      GpuEventSynchronizer*   xReadyOnDevice)
  {
@@ -1462,7 +1463,7 @@ void reduceForcesGpu(const gmx::AtomLocality                    locality,
                       const Nbnxm::GridSet&                      gridSet,
                       void*                                      pmeForcesDevice,
                       gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
-                     gmx_nbnxn_gpu_t*                           gpu_nbv,
+                     gmx_nbnxm_gpu_t*                           gpu_nbv,
                       bool                                       useGpuFPmeReduction,
                       bool                                       accumulateForce)
  {
diff --git a/src/gromacs/nbnxm/atomdata.h b/src/gromacs/nbnxm/atomdata.h

index 4aa2f72f235faf7f8d72ea6b4113459458c1e44b..68c25e59331a9f04cc1fbedadd1e273382cb0a33 100644 (file)
--- a/src/gromacs/nbnxm/atomdata.h
+++ b/src/gromacs/nbnxm/atomdata.h
@@ -33,6 +33,15 @@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
+/*! \libinternal \file
+ *  \brief
+ *  Functionality for per-atom data in the nbnxm module
+ *
+ *  \author Berk Hess <hess@kth.se>
+ *  \ingroup module_nbnxm
+ *  \inlibraryapi
+ */
+
  
  #ifndef GMX_NBNXN_ATOMDATA_H
  #define GMX_NBNXN_ATOMDATA_H
@@ -47,13 +56,12 @@
  #include "gromacs/utility/bitmask.h"
  #include "gromacs/utility/real.h"
  
-#include "gpu_types.h"
-
  namespace gmx
  {
  class MDLogger;
  }
  
+struct gmx_nbnxm_gpu_t;
  struct nbnxn_atomdata_t;
  struct nonbonded_verlet_t;
  struct t_mdatoms;
@@ -67,7 +75,7 @@ class GridSet;
  enum class KernelType;
  } // namespace Nbnxm
  
-/* Convenience type for vector with aligned memory */
+//! Convenience type for vector with aligned memory
  template<typename T>
  using AlignedVector = std::vector<T, gmx::AlignedAllocator<T>>;
  
@@ -122,8 +130,9 @@ struct nbnxn_atomdata_output_t
      AlignedVector<real>   VSc;    // Temporary SIMD Coulomb group energy storage
  };
  
-/* Block size in atoms for the non-bonded thread force-buffer reduction,
- * should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
+/*! \brief Block size in atoms for the non-bonded thread force-buffer reduction.
+ *
+ * Should be a multiple of all cell and x86 SIMD sizes (i.e. 2, 4 and 8).
   * Should be small to reduce the reduction and zeroing cost,
   * but too small will result in overhead.
   * Currently the block size is NBNXN_BUFFERFLAG_SIZE*3*sizeof(real)=192 bytes.
@@ -134,20 +143,24 @@ struct nbnxn_atomdata_output_t
  #    define NBNXN_BUFFERFLAG_SIZE 16
  #endif
  
-/* We store the reduction flags as gmx_bitmask_t.
+/*! \brief We store the reduction flags as gmx_bitmask_t.
   * This limits the number of flags to BITMASK_SIZE.
   */
  #define NBNXN_BUFFERFLAG_MAX_THREADS (BITMASK_SIZE)
  
-/* Flags for telling if threads write to force output buffers */
+/*! \internal
+ * \brief Flags for telling if threads write to force output buffers */
  typedef struct
  {
-    int            nflag;       /* The number of flag blocks                         */
-    gmx_bitmask_t* flag;        /* Bit i is set when thread i writes to a cell-block */
-    int            flag_nalloc; /* Allocation size of cxy_flag                       */
+    //! The number of flag blocks
+    int nflag;
+    //! Bit i is set when thread i writes to a cell-block
+    gmx_bitmask_t* flag;
+    //! Allocation size of cxy_flag
+    int flag_nalloc;
  } nbnxn_buffer_flags_t;
  
-/* LJ combination rules: geometric, Lorentz-Berthelot, none */
+/*! \brief LJ combination rules: geometric, Lorentz-Berthelot, none */
  enum
  {
      ljcrGEOM,
@@ -156,7 +169,8 @@ enum
      ljcrNR
  };
  
-/* Struct that stores atom related data for the nbnxn module
+/*! \internal
+ * \brief Struct that stores atom related data for the nbnxn module
   *
   * Note: performance would improve slightly when all std::vector containers
   *       in this struct would not initialize during resize().
@@ -195,86 +209,103 @@ struct nbnxn_atomdata_t
          gmx::HostVector<int> energrp;
      };
  
-    // Diagonal and topology exclusion helper data for all SIMD kernels
+    /*! \internal
+     * \brief Diagonal and topology exclusion helper data for all SIMD kernels. */
      struct SimdMasks
      {
          SimdMasks();
  
-        // Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
+        //! Helper data for setting up diagonal exclusion masks in the SIMD 4xN kernels
          AlignedVector<real> diagonal_4xn_j_minus_i;
-        // Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
+        //! Helper data for setting up diaginal exclusion masks in the SIMD 2xNN kernels
          AlignedVector<real> diagonal_2xnn_j_minus_i;
-        // Filters for topology exclusion masks for the SIMD kernels
+        //! Filters for topology exclusion masks for the SIMD kernels
          AlignedVector<uint32_t> exclusion_filter;
-        // Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
+        //! Filters for topology exclusion masks for double SIMD kernels without SIMD int32 logical support
          AlignedVector<uint64_t> exclusion_filter64;
-        // Array of masks needed for exclusions
+        //! Array of masks needed for exclusions
          AlignedVector<real> interaction_array;
      };
  
-    /* Constructor
+    /*! \brief Constructor
       *
       * \param[in] pinningPolicy  Sets the pinning policy for all data that might be transfered to a GPU
       */
      nbnxn_atomdata_t(gmx::PinningPolicy pinningPolicy);
  
-    /* Returns a const reference to the parameters */
+    //! Returns a const reference to the parameters
      const Params& params() const { return params_; }
  
-    /* Returns a non-const reference to the parameters */
+    //! Returns a non-const reference to the parameters
      Params& paramsDeprecated() { return params_; }
  
-    /* Returns the current total number of atoms stored */
+    //! Returns the current total number of atoms stored
      int numAtoms() const { return numAtoms_; }
  
-    /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+    //! Return the coordinate buffer, and q with xFormat==nbatXYZQ
      gmx::ArrayRef<const real> x() const { return x_; }
  
-    /* Return the coordinate buffer, and q with xFormat==nbatXYZQ */
+    //! Return the coordinate buffer, and q with xFormat==nbatXYZQ
      gmx::ArrayRef<real> x() { return x_; }
  
-    /* Resizes the coordinate buffer and sets the number of atoms */
+    //! Resizes the coordinate buffer and sets the number of atoms
      void resizeCoordinateBuffer(int numAtoms);
  
-    /* Resizes the force buffers for the current number of atoms */
+    //! Resizes the force buffers for the current number of atoms
      void resizeForceBuffers();
  
  private:
-    // The LJ and charge parameters
+    //! The LJ and charge parameters
      Params params_;
-    // The total number of atoms currently stored
+    //! The total number of atoms currently stored
      int numAtoms_;
  
  public:
-    int                        natoms_local; /* Number of local atoms                           */
-    int                        XFormat;     /* The format of x (and q), enum                      */
-    int                        FFormat;     /* The format of f, enum                              */
-    gmx_bool                   bDynamicBox; /* Do we need to update shift_vec every step?    */
-    gmx::HostVector<gmx::RVec> shift_vec;   /* Shift vectors, copied from t_forcerec              */
-    int                        xstride;     /* stride for a coordinate in x (usually 3 or 4)      */
-    int                        fstride;     /* stride for a coordinate in f (usually 3 or 4)      */
+    //! Number of local atoms
+    int natoms_local;
+    //! The format of x (and q), enum
+    int XFormat;
+    //! The format of f, enum
+    int FFormat;
+    //! Do we need to update shift_vec every step?
+    gmx_bool bDynamicBox;
+    //! Shift vectors, copied from t_forcerec
+    gmx::HostVector<gmx::RVec> shift_vec;
+    //! stride for a coordinate in x (usually 3 or 4)
+    int xstride;
+    //! stride for a coordinate in f (usually 3 or 4)
+    int fstride;
+
  private:
-    gmx::HostVector<real> x_; /* x and possibly q, size natoms*xstride              */
+    //! x and possibly q, size natoms*xstride
+    gmx::HostVector<real> x_;
  
  public:
-    // Masks for handling exclusions in the SIMD kernels
+    //! Masks for handling exclusions in the SIMD kernels
      const SimdMasks simdMasks;
  
-    /* Output data */
-    std::vector<nbnxn_atomdata_output_t> out; /* Output data structures, 1 per thread */
-
-    /* Reduction related data */
-    gmx_bool             bUseBufferFlags; /* Use the flags or operate on all atoms     */
-    nbnxn_buffer_flags_t buffer_flags;    /* Flags for buffer zeroing+reduc.  */
-    gmx_bool             bUseTreeReduce;  /* Use tree for force reduction */
-    tMPI_Atomic*         syncStep;        /* Synchronization step for tree reduce */
+    //! Output data structures, 1 per thread
+    std::vector<nbnxn_atomdata_output_t> out;
+
+    //! Reduction related data
+    //! \{
+    //! Use the flags or operate on all atoms
+    gmx_bool bUseBufferFlags;
+    //! Flags for buffer zeroing+reduc.
+    nbnxn_buffer_flags_t buffer_flags;
+    //! Use tree for force reduction
+    gmx_bool bUseTreeReduce;
+    //! Synchronization step for tree reduce
+    tMPI_Atomic* syncStep;
+    //! \}
  };
  
-/* Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
+/*! \brief Copy na rvec elements from x to xnb using nbatFormat, start dest a0,
   * and fills up to na_round with coordinates that are far away.
   */
  void copy_rvec_to_nbat_real(const int* a, int na, int na_round, const rvec* x, int nbatFormat, real* xnb, int a0);
  
+//! Describes the combination rule in use by this force field
  enum
  {
      enbnxninitcombruleDETECT,
@@ -283,7 +314,8 @@ enum
      enbnxninitcombruleNONE
  };
  
-/* Initialize the non-bonded atom data structure.
+/*! \brief Initialize the non-bonded atom data structure.
+ *
   * The enum for nbatXFormat is in the file defining nbnxn_atomdata_t.
   * Copy the ntypes*ntypes*2 sized nbfp non-bonded parameter list
   * to the atom data structure.
@@ -298,12 +330,13 @@ void nbnxn_atomdata_init(const gmx::MDLogger&      mdlog,
                           int                       n_energygroups,
                           int                       nout);
  
+//! Sets the atomdata after pair search
  void nbnxn_atomdata_set(nbnxn_atomdata_t*     nbat,
                          const Nbnxm::GridSet& gridSet,
                          const t_mdatoms*      mdatoms,
                          const int*            atinfo);
  
-/* Copy the shift vectors to nbat */
+//! Copy the shift vectors to nbat
  void nbnxn_atomdata_copy_shiftvec(gmx_bool dynamic_box, rvec* shift_vec, nbnxn_atomdata_t* nbat);
  
  /*! \brief Transform coordinates to xbat layout
@@ -337,7 +370,7 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet& gridSet,
  void nbnxn_atomdata_x_to_nbat_x_gpu(const Nbnxm::GridSet& gridSet,
                                      gmx::AtomLocality     locality,
                                      bool                  fillLocal,
-                                    gmx_nbnxn_gpu_t*      gpu_nbv,
+                                    gmx_nbnxm_gpu_t*      gpu_nbv,
                                      DeviceBuffer<float>   d_x,
                                      GpuEventSynchronizer* xReadyOnDevice);
  
@@ -366,14 +399,14 @@ void reduceForcesGpu(gmx::AtomLocality                          locality,
                       const Nbnxm::GridSet&                      gridSet,
                       void*                                      pmeForcesDevice,
                       gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
-                     gmx_nbnxn_gpu_t*                           gpu_nbv,
+                     gmx_nbnxm_gpu_t*                           gpu_nbv,
                       bool                                       useGpuFPmeReduction,
                       bool                                       accumulateForce);
  
-/* Add the fshift force stored in nbat to fshift */
+//! Add the fshift force stored in nbat to fshift
  void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t& nbat, gmx::ArrayRef<gmx::RVec> fshift);
  
-/* Get the atom start index and number of atoms for a given locality */
+//! Get the atom start index and number of atoms for a given locality
  void nbnxn_get_atom_range(gmx::AtomLocality     atomLocality,
                            const Nbnxm::GridSet& gridSet,
                            int*                  atomStart,
diff --git a/src/gromacs/nbnxm/clusterdistancekerneltype.h b/src/gromacs/nbnxm/clusterdistancekerneltype.h

index 43ce747dade7cdc2bd50b74770505ac938398cbd..c7f680ab0c76f32b82ba1c51c5c9af0bb6a77388 100644 (file)
--- a/src/gromacs/nbnxm/clusterdistancekerneltype.h
+++ b/src/gromacs/nbnxm/clusterdistancekerneltype.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -45,10 +45,10 @@
  #ifndef GMX_NBNXM_CLUSTERDISTANCEKERNELTYPE_H
  #define GMX_NBNXM_CLUSTERDISTANCEKERNELTYPE_H
  
+#include "gromacs/nbnxm/atomdata.h"
  #include "gromacs/simd/simd.h"
  #include "gromacs/utility/gmxassert.h"
  
-#include "atomdata.h"
  #include "pairlistparams.h"
  
  //! The types of kernel for calculating the distance between pairs of atom clusters
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu

index 11a67cdae4f0afce3270bef6ad07ed85e8ab47a4..57504d06bb15e130d71eaf81a953d5b87af2996c 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -363,7 +363,7 @@ static inline int calc_shmem_required_nonbonded(const int               num_thre
   *  the local, this function records the event if called with the local stream as
   *  argument and inserts in the GPU stream a wait on the event on the nonlocal.
   */
-void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_cuda_t* nb, const InteractionLocality interactionLocality)
+void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxm_gpu_t* nb, const InteractionLocality interactionLocality)
  {
      cudaStream_t stream = nb->stream[interactionLocality];
  
@@ -389,7 +389,7 @@ void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_cuda_t* nb, const Interact
  }
  
  /*! \brief Launch asynchronously the xq buffer host to device copy. */
-void gpu_copy_xq_to_gpu(gmx_nbnxn_cuda_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
+void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
  {
      GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
  
@@ -477,7 +477,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxn_cuda_t* nb, const nbnxn_atomdata_t* nbatom, co
     the local x+q H2D (and all preceding) tasks are complete and synchronize
     with this event in the non-local stream before launching the non-bonded kernel.
   */
-void gpu_launch_kernel(gmx_nbnxn_cuda_t* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
+void gpu_launch_kernel(gmx_nbnxm_gpu_t* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc)
  {
      cu_atomdata_t* adat   = nb->atdat;
      cu_nbparam_t*  nbp    = nb->nbparam;
@@ -589,7 +589,7 @@ static inline int calc_shmem_required_prune(const int num_threads_z)
      return shmem;
  }
  
-void gpu_launch_kernel_pruneonly(gmx_nbnxn_cuda_t* nb, const InteractionLocality iloc, const int numParts)
+void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc, const int numParts)
  {
      cu_atomdata_t* adat   = nb->atdat;
      cu_nbparam_t*  nbp    = nb->nbparam;
@@ -713,7 +713,7 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxn_cuda_t* nb, const InteractionLocality
      }
  }
  
-void gpu_launch_cpyback(gmx_nbnxn_cuda_t*        nb,
+void gpu_launch_cpyback(gmx_nbnxm_gpu_t*         nb,
                          nbnxn_atomdata_t*        nbatom,
                          const gmx::StepWorkload& stepWork,
                          const AtomLocality       atomLocality)
@@ -817,7 +817,7 @@ void cuda_set_cacheconfig()
  /* X buffer operations on GPU: performs conversion from rvec to nb format. */
  void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&        grid,
                             bool                      setFillerCoords,
-                           gmx_nbnxn_gpu_t*          nb,
+                           gmx_nbnxm_gpu_t*          nb,
                             DeviceBuffer<float>       d_x,
                             GpuEventSynchronizer*     xReadyOnDevice,
                             const Nbnxm::AtomLocality locality,
@@ -884,7 +884,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&        grid,
   */
  void nbnxn_gpu_add_nbat_f_to_f(const AtomLocality                         atomLocality,
                                 DeviceBuffer<float>                        totalForcesDevice,
-                               gmx_nbnxn_gpu_t*                           nb,
+                               gmx_nbnxm_gpu_t*                           nb,
                                 void*                                      pmeForcesDevice,
                                 gmx::ArrayRef<GpuEventSynchronizer* const> dependencyList,
                                 int                                        atomStart,
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu

index 5aea84472677714a129b7aa27425d184b10c0ae0..2714dfee8d7dec879c6f4b5ed8cc47069a29e859 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
@@ -45,7 +45,7 @@
  #include <stdio.h>
  #include <stdlib.h>
  
-// TODO We would like to move this down, but the way gmx_nbnxn_gpu_t
+// TODO We would like to move this down, but the way gmx_nbnxm_gpu_t
  //      is currently declared means this has to be before gpu_types.h
  #include "nbnxm_cuda_types.h"
  
@@ -89,7 +89,7 @@ namespace Nbnxm
  static unsigned int gpu_min_ci_balanced_factor = 44;
  
  /* Fw. decl. */
-static void nbnxn_cuda_clear_e_fshift(gmx_nbnxn_cuda_t* nb);
+static void nbnxn_cuda_clear_e_fshift(gmx_nbnxm_gpu_t* nb);
  
  /* Fw. decl, */
  static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam);
@@ -400,7 +400,7 @@ static void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
  }
  
  /*! Initializes simulation constant data. */
-static void cuda_init_const(gmx_nbnxn_cuda_t*               nb,
+static void cuda_init_const(gmx_nbnxm_gpu_t*                nb,
                              const interaction_const_t*      ic,
                              const PairlistParams&           listParams,
                              const nbnxn_atomdata_t::Params& nbatParams)
@@ -412,16 +412,16 @@ static void cuda_init_const(gmx_nbnxn_cuda_t*               nb,
      nbnxn_cuda_clear_e_fshift(nb);
  }
  
-gmx_nbnxn_cuda_t* gpu_init(const gmx_device_info_t*   deviceInfo,
-                           const interaction_const_t* ic,
-                           const PairlistParams&      listParams,
-                           const nbnxn_atomdata_t*    nbat,
-                           int /*rank*/,
-                           gmx_bool bLocalAndNonlocal)
+gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t*   deviceInfo,
+                          const interaction_const_t* ic,
+                          const PairlistParams&      listParams,
+                          const nbnxn_atomdata_t*    nbat,
+                          int /*rank*/,
+                          gmx_bool bLocalAndNonlocal)
  {
      cudaError_t stat;
  
-    gmx_nbnxn_cuda_t* nb;
+    gmx_nbnxm_gpu_t* nb;
      snew(nb, 1);
      snew(nb->atdat, 1);
      snew(nb->nbparam, 1);
@@ -509,7 +509,7 @@ gmx_nbnxn_cuda_t* gpu_init(const gmx_device_info_t*   deviceInfo,
      return nb;
  }
  
-void gpu_init_pairlist(gmx_nbnxn_cuda_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
+void gpu_init_pairlist(gmx_nbnxm_gpu_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
  {
      char         sbuf[STRLEN];
      bool         bDoTime = (nb->bDoTime && !h_plist->sci.empty());
@@ -565,7 +565,7 @@ void gpu_init_pairlist(gmx_nbnxn_cuda_t* nb, const NbnxnPairlistGpu* h_plist, co
      d_plist->haveFreshList = true;
  }
  
-void gpu_upload_shiftvec(gmx_nbnxn_cuda_t* nb, const nbnxn_atomdata_t* nbatom)
+void gpu_upload_shiftvec(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom)
  {
      cu_atomdata_t* adat = nb->atdat;
      cudaStream_t   ls   = nb->stream[InteractionLocality::Local];
@@ -579,7 +579,7 @@ void gpu_upload_shiftvec(gmx_nbnxn_cuda_t* nb, const nbnxn_atomdata_t* nbatom)
  }
  
  /*! Clears the first natoms_clear elements of the GPU nonbonded force output array. */
-static void nbnxn_cuda_clear_f(gmx_nbnxn_cuda_t* nb, int natoms_clear)
+static void nbnxn_cuda_clear_f(gmx_nbnxm_gpu_t* nb, int natoms_clear)
  {
      cudaError_t    stat;
      cu_atomdata_t* adat = nb->atdat;
@@ -590,7 +590,7 @@ static void nbnxn_cuda_clear_f(gmx_nbnxn_cuda_t* nb, int natoms_clear)
  }
  
  /*! Clears nonbonded shift force output array and energy outputs on the GPU. */
-static void nbnxn_cuda_clear_e_fshift(gmx_nbnxn_cuda_t* nb)
+static void nbnxn_cuda_clear_e_fshift(gmx_nbnxm_gpu_t* nb)
  {
      cudaError_t    stat;
      cu_atomdata_t* adat = nb->atdat;
@@ -604,7 +604,7 @@ static void nbnxn_cuda_clear_e_fshift(gmx_nbnxn_cuda_t* nb)
      CU_RET_ERR(stat, "cudaMemsetAsync on e_el falied");
  }
  
-void gpu_clear_outputs(gmx_nbnxn_cuda_t* nb, bool computeVirial)
+void gpu_clear_outputs(gmx_nbnxm_gpu_t* nb, bool computeVirial)
  {
      nbnxn_cuda_clear_f(nb, nb->atdat->natoms);
      /* clear shift force array and energies if the outputs were
@@ -615,7 +615,7 @@ void gpu_clear_outputs(gmx_nbnxn_cuda_t* nb, bool computeVirial)
      }
  }
  
-void gpu_init_atomdata(gmx_nbnxn_cuda_t* nb, const nbnxn_atomdata_t* nbat)
+void gpu_init_atomdata(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbat)
  {
      cudaError_t    stat;
      int            nalloc, natoms;
@@ -702,7 +702,7 @@ static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t* nbparam)
      }
  }
  
-void gpu_free(gmx_nbnxn_cuda_t* nb)
+void gpu_free(gmx_nbnxm_gpu_t* nb)
  {
      cudaError_t    stat;
      cu_atomdata_t* atdat;
@@ -798,7 +798,7 @@ void gpu_free(gmx_nbnxn_cuda_t* nb)
  }
  
  //! This function is documented in the header file
-gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxn_cuda_t* nb)
+gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t* nb)
  {
      return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
  }
@@ -811,38 +811,38 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv)
      }
  }
  
-int gpu_min_ci_balanced(gmx_nbnxn_cuda_t* nb)
+int gpu_min_ci_balanced(gmx_nbnxm_gpu_t* nb)
  {
      return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->prop.multiProcessorCount : 0;
  }
  
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxn_cuda_t* nb)
+gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t* nb)
  {
      return ((nb->nbparam->eeltype == eelCuEWALD_ANA) || (nb->nbparam->eeltype == eelCuEWALD_ANA_TWIN));
  }
  
-void* gpu_get_command_stream(gmx_nbnxn_gpu_t* nb, const InteractionLocality iloc)
+void* gpu_get_command_stream(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc)
  {
      assert(nb);
  
      return static_cast<void*>(&nb->stream[iloc]);
  }
  
-void* gpu_get_xq(gmx_nbnxn_gpu_t* nb)
+void* gpu_get_xq(gmx_nbnxm_gpu_t* nb)
  {
      assert(nb);
  
      return static_cast<void*>(nb->atdat->xq);
  }
  
-void* gpu_get_f(gmx_nbnxn_gpu_t* nb)
+void* gpu_get_f(gmx_nbnxm_gpu_t* nb)
  {
      assert(nb);
  
      return static_cast<void*>(nb->atdat->f);
  }
  
-rvec* gpu_get_fshift(gmx_nbnxn_gpu_t* nb)
+rvec* gpu_get_fshift(gmx_nbnxm_gpu_t* nb)
  {
      assert(nb);
  
@@ -851,7 +851,7 @@ rvec* gpu_get_fshift(gmx_nbnxn_gpu_t* nb)
  
  /* Initialization for X buffer operations on GPU. */
  /* TODO  Remove explicit pinning from host arrays from here and manage in a more natural way*/
-void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, gmx_nbnxn_gpu_t* gpu_nbv)
+void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, gmx_nbnxm_gpu_t* gpu_nbv)
  {
      cudaStream_t stream        = gpu_nbv->stream[InteractionLocality::Local];
      bool         bDoTime       = gpu_nbv->bDoTime;
@@ -937,7 +937,7 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet& gridSet, gmx_nbnxn_gpu_t*
  
  /* Initialization for F buffer operations on GPU. */
  void nbnxn_gpu_init_add_nbat_f_to_f(const int*                  cell,
-                                    gmx_nbnxn_gpu_t*            gpu_nbv,
+                                    gmx_nbnxm_gpu_t*            gpu_nbv,
                                      int                         natoms_total,
                                      GpuEventSynchronizer* const localReductionDone)
  {
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h

index d65d308c48f612bf2ad75d8c078ed39d5a51abe1..797a0b4bc4778407cefce03f05b422ae2b8bfcdf 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2012, The GROMACS development team.
- * Copyright (c) 2013-2019, by the GROMACS development team, led by
+ * Copyright (c) 2013-2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -223,84 +223,94 @@ typedef struct Nbnxm::gpu_timers_t cu_timers_t;
  
  class GpuEventSynchronizer;
  
-/** \internal
+/*! \internal
   * \brief Main data structure for CUDA nonbonded force calculations.
   */
-struct gmx_nbnxn_cuda_t
+struct gmx_nbnxm_gpu_t
  {
-    //! CUDA device information
+    /*! \brief CUDA device information */
      const gmx_device_info_t* dev_info;
-    //! true if doing both local/non-local NB work on GPU
+    /*! \brief true if doing both local/non-local NB work on GPU */
      bool bUseTwoStreams;
-    //! atom data
+    /*! \brief atom data */
      cu_atomdata_t* atdat;
-    //! f buf ops cell index mapping
+    /*! \brief f buf ops cell index mapping */
      int* cell;
-    //! number of indices in cell buffer
+    /*! \brief number of indices in cell buffer */
      int ncell;
-    //! number of indices allocated in cell buffer
+    /*! \brief number of indices allocated in cell buffer */
      int ncell_alloc;
-    //! array of atom indices
+    /*! \brief array of atom indices */
      int* atomIndices;
-    //! size of atom indices
+    /*! \brief size of atom indices */
      int atomIndicesSize;
-    //! size of atom indices allocated in device buffer
+    /*! \brief size of atom indices allocated in device buffer */
      int atomIndicesSize_alloc;
-    //! x buf ops num of atoms
+    /*! \brief x buf ops num of atoms */
      int* cxy_na;
-    //! number of elements in cxy_na
+    /*! \brief number of elements in cxy_na */
      int ncxy_na;
-    //! number of elements allocated allocated in device buffer
+    /*! \brief number of elements allocated allocated in device buffer */
      int ncxy_na_alloc;
-    //! x buf ops cell index mapping
+    /*! \brief x buf ops cell index mapping */
      int* cxy_ind;
-    //! number of elements in cxy_ind
+    /*! \brief number of elements in cxy_ind */
      int ncxy_ind;
-    //! number of elements allocated allocated in device buffer
+    /*! \brief number of elements allocated allocated in device buffer */
      int ncxy_ind_alloc;
-    //! parameters required for the non-bonded calc.
+    /*! \brief parameters required for the non-bonded calc. */
      cu_nbparam_t* nbparam;
-    //! pair-list data structures (local and non-local)
+    /*! \brief pair-list data structures (local and non-local) */
      gmx::EnumerationArray<Nbnxm::InteractionLocality, cu_plist_t*> plist;
-    //! staging area where fshift/energies get downloaded
+    /*! \brief staging area where fshift/energies get downloaded */
      nb_staging_t nbst;
-    //! local and non-local GPU streams
+    /*! \brief local and non-local GPU streams */
      gmx::EnumerationArray<Nbnxm::InteractionLocality, cudaStream_t> stream;
  
-    /** events used for synchronization */
-    cudaEvent_t nonlocal_done; /**< event triggered when the non-local non-bonded kernel
-                                  is done (and the local transfer can proceed)           */
-    cudaEvent_t misc_ops_and_local_H2D_done; /**< event triggered when the tasks issued in
-                                                the local stream that need to precede the
-                                                non-local force or buffer operation calculations are
-                                                done (e.g. f buffer 0-ing, local x/q H2D, buffer op
-                                                initialization in local stream that is required also
-                                                by nonlocal stream ) */
-
-    //! True if there has been local/nonlocal GPU work, either bonded or nonbonded, scheduled
-    //  to be executed in the current domain. As long as bonded work is not split up into
-    //  local/nonlocal, if there is bonded GPU work, both flags will be true.
+    /*! \brief Events used for synchronization */
+    /*! \{ */
+    /*! \brief Event triggered when the non-local non-bonded
+     * kernel is done (and the local transfer can proceed) */
+    cudaEvent_t nonlocal_done;
+    /*! \brief Event triggered when the tasks issued in the local
+     * stream that need to precede the non-local force or buffer
+     * operation calculations are done (e.g. f buffer 0-ing, local
+     * x/q H2D, buffer op initialization in local stream that is
+     * required also by nonlocal stream ) */
+    cudaEvent_t misc_ops_and_local_H2D_done;
+    /*! \} */
+
+    /*! \brief True if there is work for the current domain in the
+     * respective locality.
+     *
+     * This includes local/nonlocal GPU work, either bonded or
+     * nonbonded, scheduled to be executed in the current
+     * domain. As long as bonded work is not split up into
+     * local/nonlocal, if there is bonded GPU work, both flags
+     * will be true. */
      gmx::EnumerationArray<Nbnxm::InteractionLocality, bool> haveWork;
  
-    /*! \brief Pointer to event synchronizer triggered when the local GPU buffer ops / reduction is complete
+    /*! \brief Pointer to event synchronizer triggered when the local
+     * GPU buffer ops / reduction is complete
       *
-     * \note That the synchronizer is managed outside of this module in StatePropagatorDataGpu.
+     * \note That the synchronizer is managed outside of this module
+     * in StatePropagatorDataGpu.
       */
      GpuEventSynchronizer* localFReductionDone;
  
-    GpuEventSynchronizer* xNonLocalCopyD2HDone; /**< event triggered when
-                                                   non-local coordinate buffer has been
-                                                   copied from device to host*/
+    /*! \brief Event triggered when non-local coordinate buffer
+     * has been copied from device to host. */
+    GpuEventSynchronizer* xNonLocalCopyD2HDone;
  
      /* NOTE: With current CUDA versions (<=5.0) timing doesn't work with multiple
       * concurrent streams, so we won't time if both l/nl work is done on GPUs.
       * Timer init/uninit is still done even with timing off so only the condition
       * setting bDoTime needs to be change if this CUDA "feature" gets fixed. */
-    //! True if event-based timing is enabled.
+    /*! \brief True if event-based timing is enabled. */
      bool bDoTime;
-    //! CUDA event-based timers.
+    /*! \brief CUDA event-based timers. */
      cu_timers_t* timers;
-    //! Timing data. TODO: deprecate this and query timers for accumulated data instead
+    /*! \brief Timing data. TODO: deprecate this and query timers for accumulated data instead */
      gmx_wallclock_gpu_nbnxn_t* timings;
  };
  
diff --git a/src/gromacs/nbnxm/gpu_common.h b/src/gromacs/nbnxm/gpu_common.h

index d209f0319abcd55f4dcd3c66df03d0e0f8a9d25d..a8369ce2d29aaacb775feb9c9811ce63a0cff421 100644 (file)
--- a/src/gromacs/nbnxm/gpu_common.h
+++ b/src/gromacs/nbnxm/gpu_common.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -124,7 +124,7 @@ static inline InteractionLocality gpuAtomToInteractionLocality(const AtomLocalit
  
  
  //NOLINTNEXTLINE(misc-definitions-in-headers)
-void setupGpuShortRangeWork(gmx_nbnxn_gpu_t*               nb,
+void setupGpuShortRangeWork(gmx_nbnxm_gpu_t*               nb,
                              const gmx::GpuBonded*          gpuBonded,
                              const gmx::InteractionLocality iLocality)
  {
@@ -146,13 +146,13 @@ void setupGpuShortRangeWork(gmx_nbnxn_gpu_t*               nb,
   * \param[inout]  nb        Pointer to the nonbonded GPU data structure
   * \param[in]     iLocality Interaction locality identifier
   */
-static bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t& nb, const gmx::InteractionLocality iLocality)
+static bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t& nb, const gmx::InteractionLocality iLocality)
  {
      return nb.haveWork[iLocality];
  }
  
  //NOLINTNEXTLINE(misc-definitions-in-headers)
-bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t* nb, const gmx::AtomLocality aLocality)
+bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t* nb, const gmx::AtomLocality aLocality)
  {
      GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
  
@@ -362,7 +362,7 @@ static inline void gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t* timings,
   * \todo Move into shared source file with gmx_compile_cpp_as_cuda
   */
  //NOLINTNEXTLINE(misc-definitions-in-headers)
-bool gpu_try_finish_task(gmx_nbnxn_gpu_t*         nb,
+bool gpu_try_finish_task(gmx_nbnxm_gpu_t*         nb,
                           const gmx::StepWorkload& stepWork,
                           const AtomLocality       aloc,
                           real*                    e_lj,
@@ -458,7 +458,7 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t*         nb,
   * \return            The number of cycles the gpu wait took
   */
  //NOLINTNEXTLINE(misc-definitions-in-headers) TODO: move into source file
-float gpu_wait_finish_task(gmx_nbnxn_gpu_t*         nb,
+float gpu_wait_finish_task(gmx_nbnxm_gpu_t*         nb,
                             const gmx::StepWorkload& stepWork,
                             AtomLocality             aloc,
                             real*                    e_lj,
diff --git a/src/gromacs/nbnxm/gpu_common_utils.h b/src/gromacs/nbnxm/gpu_common_utils.h

index 4c3333d82a622ba09883fba637d5fb63c54b5689..176ab8f0457905e8eee5196fbad7621a5e84ba63 100644 (file)
--- a/src/gromacs/nbnxm/gpu_common_utils.h
+++ b/src/gromacs/nbnxm/gpu_common_utils.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2017,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -64,7 +64,7 @@ namespace Nbnxm
   * local part of the force array also depends on the non-local kernel.
   * The skip of the local kernel is taken care of separately.
   */
-static inline bool canSkipNonbondedWork(const gmx_nbnxn_gpu_t& nb, InteractionLocality iloc)
+static inline bool canSkipNonbondedWork(const gmx_nbnxm_gpu_t& nb, InteractionLocality iloc)
  {
      assert(nb.plist[iloc]);
      return (iloc == InteractionLocality::NonLocal && nb.plist[iloc]->nsci == 0);
diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h

index c93c536becb1f31795b498fb283e984f6bafa4ba..2f504e91f8d261abcd28e6c247c99301b05c2b68 100644 (file)
--- a/src/gromacs/nbnxm/gpu_data_mgmt.h
+++ b/src/gromacs/nbnxm/gpu_data_mgmt.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2014,2015,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -50,21 +50,20 @@
  #include "gromacs/mdtypes/interaction_const.h"
  #include "gromacs/mdtypes/locality.h"
  
-#include "gpu_types.h"
-
-struct NbnxnPairlistGpu;
-struct nbnxn_atomdata_t;
-struct PairlistParams;
-struct gmx_wallclock_gpu_nbnxn_t;
+struct gmx_nbnxm_gpu_t;
  struct gmx_gpu_info_t;
  struct gmx_device_info_t;
+struct gmx_wallclock_gpu_nbnxn_t;
+struct nbnxn_atomdata_t;
+struct NbnxnPairlistGpu;
+struct PairlistParams;
  
  namespace Nbnxm
  {
  
  /** Initializes the data structures related to GPU nonbonded calculations. */
  GPU_FUNC_QUALIFIER
-gmx_nbnxn_gpu_t* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo,
+gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo,
                            const interaction_const_t gmx_unused* ic,
                            const PairlistParams gmx_unused& listParams,
                            const nbnxn_atomdata_t gmx_unused* nbat,
@@ -74,13 +73,13 @@ gmx_nbnxn_gpu_t* gpu_init(const gmx_device_info_t gmx_unused* deviceInfo,
  
  /** Initializes pair-list data for GPU, called at every pair search step. */
  GPU_FUNC_QUALIFIER
-void gpu_init_pairlist(gmx_nbnxn_gpu_t gmx_unused*   nb,
+void gpu_init_pairlist(gmx_nbnxm_gpu_t gmx_unused*   nb,
                         const struct NbnxnPairlistGpu gmx_unused* h_nblist,
                         gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
  
  /** Initializes atom-data on the GPU, called at every pair search step. */
  GPU_FUNC_QUALIFIER
-void gpu_init_atomdata(gmx_nbnxn_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbat) GPU_FUNC_TERM;
+void gpu_init_atomdata(gmx_nbnxm_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbat) GPU_FUNC_TERM;
  
  /*! \brief Re-generate the GPU Ewald force table, resets rlist, and update the
   *  electrostatic type switching to twin cut-off (or back) if needed.
@@ -91,19 +90,19 @@ void gpu_pme_loadbal_update_param(const struct nonbonded_verlet_t gmx_unused* nb
  
  /** Uploads shift vector to the GPU if the box is dynamic (otherwise just returns). */
  GPU_FUNC_QUALIFIER
-void gpu_upload_shiftvec(gmx_nbnxn_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbatom) GPU_FUNC_TERM;
+void gpu_upload_shiftvec(gmx_nbnxm_gpu_t gmx_unused* nb, const nbnxn_atomdata_t gmx_unused* nbatom) GPU_FUNC_TERM;
  
  /** Clears GPU outputs: nonbonded force, shift force and energy. */
  GPU_FUNC_QUALIFIER
-void gpu_clear_outputs(gmx_nbnxn_gpu_t gmx_unused* nb, bool gmx_unused computeVirial) GPU_FUNC_TERM;
+void gpu_clear_outputs(gmx_nbnxm_gpu_t gmx_unused* nb, bool gmx_unused computeVirial) GPU_FUNC_TERM;
  
  /** Frees all GPU resources used for the nonbonded calculations. */
  GPU_FUNC_QUALIFIER
-void gpu_free(gmx_nbnxn_gpu_t gmx_unused* nb) GPU_FUNC_TERM;
+void gpu_free(gmx_nbnxm_gpu_t gmx_unused* nb) GPU_FUNC_TERM;
  
  /** Returns the GPU timings structure or NULL if GPU is not used or timing is off. */
  GPU_FUNC_QUALIFIER
-struct gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxn_gpu_t gmx_unused* nb)
+struct gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t gmx_unused* nb)
          GPU_FUNC_TERM_WITH_RETURN(nullptr);
  
  /** Resets nonbonded GPU timings. */
@@ -113,37 +112,37 @@ void gpu_reset_timings(struct nonbonded_verlet_t gmx_unused* nbv) GPU_FUNC_TERM;
  /** Calculates the minimum size of proximity lists to improve SM load balance
   *  with GPU non-bonded kernels. */
  GPU_FUNC_QUALIFIER
-int gpu_min_ci_balanced(gmx_nbnxn_gpu_t gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1);
+int gpu_min_ci_balanced(gmx_nbnxm_gpu_t gmx_unused* nb) GPU_FUNC_TERM_WITH_RETURN(-1);
  
  /** Returns if analytical Ewald GPU kernels are used. */
  GPU_FUNC_QUALIFIER
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxn_gpu_t gmx_unused* nb)
+gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t gmx_unused* nb)
          GPU_FUNC_TERM_WITH_RETURN(FALSE);
  
  /** Returns an opaque pointer to the GPU command stream
   *  Note: CUDA only.
   */
  CUDA_FUNC_QUALIFIER
-void* gpu_get_command_stream(gmx_nbnxn_gpu_t gmx_unused* nb, gmx::InteractionLocality gmx_unused iloc)
+void* gpu_get_command_stream(gmx_nbnxm_gpu_t gmx_unused* nb, gmx::InteractionLocality gmx_unused iloc)
          CUDA_FUNC_TERM_WITH_RETURN(nullptr);
  
  /** Returns an opaque pointer to the GPU coordinate+charge array
   *  Note: CUDA only.
   */
  CUDA_FUNC_QUALIFIER
-void* gpu_get_xq(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+void* gpu_get_xq(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
  
  /** Returns an opaque pointer to the GPU force array
   *  Note: CUDA only.
   */
  CUDA_FUNC_QUALIFIER
-void* gpu_get_f(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+void* gpu_get_f(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
  
  /** Returns an opaque pointer to the GPU shift force array
   *  Note: CUDA only.
   */
  CUDA_FUNC_QUALIFIER
-rvec* gpu_get_fshift(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+rvec* gpu_get_fshift(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
  
  } // namespace Nbnxm
  
diff --git a/src/gromacs/nbnxm/gpu_jit_support.h b/src/gromacs/nbnxm/gpu_jit_support.h

index b784d37ce8fa7e9940f01f91a244dbbd2dab2efd..3a5928d2bb687c0d7b0e5f0965d49d6db893fac0 100644 (file)
--- a/src/gromacs/nbnxm/gpu_jit_support.h
+++ b/src/gromacs/nbnxm/gpu_jit_support.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2014,2015,2019, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -46,9 +46,9 @@
  
  #include "gromacs/utility/basedefinitions.h"
  
-#include "gpu_types.h"
+struct gmx_nbnxm_gpu_t;
  
  /*! \brief Handles any JIT compilation of nbnxn kernels for the selected device */
-OPENCL_FUNC_QUALIFIER void nbnxn_gpu_compile_kernels(gmx_nbnxn_gpu_t gmx_unused* nb) OPENCL_FUNC_TERM;
+OPENCL_FUNC_QUALIFIER void nbnxn_gpu_compile_kernels(gmx_nbnxm_gpu_t gmx_unused* nb) OPENCL_FUNC_TERM;
  
  #endif
diff --git a/src/gromacs/nbnxm/gpu_types.h b/src/gromacs/nbnxm/gpu_types.h

deleted file mode 100644 (file)

index 43039f7..0000000
--- a/src/gromacs/nbnxm/gpu_types.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2018 by the GROMACS development team.
- * Copyright (c) 2019,2020, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \libinternal \file
- * \brief Sets gmx_nbnxn_gpu_t to the correct type depending on the build
- *
- * \ingroup module_nbnxm
- */
-
-#ifndef GMX_NBNXN_GPU_TYPES_H
-#define GMX_NBNXN_GPU_TYPES_H
-
-#include "config.h"
-
-#ifndef DOXYGEN
-
-#    if GMX_GPU == GMX_GPU_OPENCL
-struct gmx_nbnxn_ocl_t;
-using gmx_nbnxn_gpu_t = gmx_nbnxn_ocl_t;
-#    endif
-
-#    if GMX_GPU == GMX_GPU_CUDA
-struct gmx_nbnxn_cuda_t;
-using gmx_nbnxn_gpu_t = gmx_nbnxn_cuda_t;
-#    endif
-
-#    if GMX_GPU == GMX_GPU_NONE
-using gmx_nbnxn_gpu_t = int;
-#    endif
-
-#endif // !DOXYGEN
-
-#endif
diff --git a/src/gromacs/nbnxm/grid.cpp b/src/gromacs/nbnxm/grid.cpp

index 060722c4e3a942a80a00cc6dd36089da8d4b9520..d852ec4f0899db0a550287aaa9419b28ea9a9fce 100644 (file)
--- a/src/gromacs/nbnxm/grid.cpp
+++ b/src/gromacs/nbnxm/grid.cpp
@@ -57,10 +57,10 @@
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
  #include "gromacs/mdlib/updategroupscog.h"
  #include "gromacs/mdtypes/forcerec.h" // only for GET_CGINFO_*
+#include "gromacs/nbnxm/atomdata.h"
  #include "gromacs/simd/simd.h"
  #include "gromacs/simd/vector_operations.h"
  
-#include "atomdata.h"
  #include "boundingboxes.h"
  #include "gridsetdata.h"
  #include "nbnxm_geometry.h"
diff --git a/src/gromacs/nbnxm/gridset.cpp b/src/gromacs/nbnxm/gridset.cpp

index 1adcff76a67218b481c024f88107ed8ee352144c..a2e83ddc6a5e72b94159af34902013bc5fc37e4e 100644 (file)
--- a/src/gromacs/nbnxm/gridset.cpp
+++ b/src/gromacs/nbnxm/gridset.cpp
@@ -48,10 +48,9 @@
  
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
  #include "gromacs/mdlib/updategroupscog.h"
+#include "gromacs/nbnxm/atomdata.h"
  #include "gromacs/utility/fatalerror.h"
  
-#include "atomdata.h"
-
  namespace Nbnxm
  {
  
diff --git a/src/gromacs/nbnxm/gridsetdata.h b/src/gromacs/nbnxm/gridsetdata.h

index ed8fc182bdbaf2c7cd9cac54ddeca779b9bbc21e..9d7301d34522a35a8d61f8ed27da6177fe7d91cd 100644 (file)
--- a/src/gromacs/nbnxm/gridsetdata.h
+++ b/src/gromacs/nbnxm/gridsetdata.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -49,6 +49,7 @@
  
  #include <vector>
  
+#include "gromacs/gpu_utils/hostallocator.h"
  
  namespace Nbnxm
  {
diff --git a/src/gromacs/nbnxm/kernel_common.h b/src/gromacs/nbnxm/kernel_common.h

index ecc6abf9d0b1701d5b6f4a8a8a40eeb64392916d..f93e03521d35b1f98080f028ec5b0b7ee9c08a50 100644 (file)
--- a/src/gromacs/nbnxm/kernel_common.h
+++ b/src/gromacs/nbnxm/kernel_common.h
@@ -48,9 +48,9 @@
  
  #include "gromacs/math/vectypes.h"
  /* nbnxn_atomdata_t and nbnxn_pairlist_t could be forward declared, but that requires modifications in all SIMD kernel files */
+#include "gromacs/nbnxm/atomdata.h"
  #include "gromacs/utility/real.h"
  
-#include "atomdata.h"
  #include "pairlist.h"
  
  struct interaction_const_t;
diff --git a/src/gromacs/nbnxm/kerneldispatch.cpp b/src/gromacs/nbnxm/kerneldispatch.cpp

index 165dd2a7247dcde0c15bdbabbd731cc54744f45b..d3513966b4cf93d90cf59c892981d30c9b36f06a 100644 (file)
--- a/src/gromacs/nbnxm/kerneldispatch.cpp
+++ b/src/gromacs/nbnxm/kerneldispatch.cpp
@@ -59,6 +59,7 @@
  #include "gromacs/utility/real.h"
  
  #include "kernel_common.h"
+#include "nbnxm_gpu.h"
  #include "nbnxm_simd.h"
  #include "pairlistset.h"
  #include "pairlistsets.h"
diff --git a/src/gromacs/nbnxm/nbnxm.cpp b/src/gromacs/nbnxm/nbnxm.cpp

index 1531f02816f73744b0cf3d687040079a296ab983..07b24fc923e0a94de18b885412f52957879677e0 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm.cpp
+++ b/src/gromacs/nbnxm/nbnxm.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -46,9 +46,10 @@
  #include "nbnxm.h"
  
  #include "gromacs/domdec/domdec_struct.h"
+#include "gromacs/nbnxm/atomdata.h"
  #include "gromacs/timing/wallcycle.h"
  
-#include "atomdata.h"
+#include "nbnxm_gpu.h"
  #include "pairlistsets.h"
  #include "pairsearch.h"
  
@@ -162,7 +163,7 @@ void nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const gmx::AtomLocality  local
  
      /* Skip the reduction if there was no short-range GPU work to do
       * (either NB or both NB and bonded work). */
-    if (!pairlistIsSimple() && !haveGpuShortRangeWork(locality))
+    if (!pairlistIsSimple() && !Nbnxm::haveGpuShortRangeWork(gpu_nbv, locality))
      {
          return;
      }
@@ -190,7 +191,7 @@ void nonbonded_verlet_t::atomdata_add_nbat_f_to_f_gpu(const gmx::AtomLocality lo
  
      /* Skip the reduction if there was no short-range GPU work to do
       * (either NB or both NB and bonded work). */
-    if (!pairlistIsSimple() && !haveGpuShortRangeWork(locality))
+    if (!pairlistIsSimple() && !Nbnxm::haveGpuShortRangeWork(gpu_nbv, locality))
      {
          return;
      }
@@ -235,6 +236,15 @@ void nonbonded_verlet_t::changePairlistRadii(real rlistOuter, real rlistInner)
      pairlistSets_->changePairlistRadii(rlistOuter, rlistInner);
  }
  
+void nonbonded_verlet_t::setupGpuShortRangeWork(const gmx::GpuBonded*          gpuBonded,
+                                                const gmx::InteractionLocality iLocality)
+{
+    if (useGpu() && !emulateGpu())
+    {
+        Nbnxm::setupGpuShortRangeWork(gpu_nbv, gpuBonded, iLocality);
+    }
+}
+
  void nonbonded_verlet_t::atomdata_init_copy_x_to_nbat_x_gpu()
  {
      Nbnxm::nbnxn_gpu_init_x_to_nbat_x(pairSearch_->gridSet(), gpu_nbv);
diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h

index 1a27e2e160dd06cbf290570cb0898cf7cea373dc..c820e5772696677ae781dc1a6ed0c0ac9d200d83 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm.h
+++ b/src/gromacs/nbnxm/nbnxm.h
@@ -118,19 +118,17 @@
  #include "gromacs/mdtypes/locality.h"
  #include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/enumerationhelpers.h"
-#include "gromacs/utility/range.h"
  #include "gromacs/utility/real.h"
  
-// TODO: Remove this include
-#include "nbnxm_gpu.h"
-
  struct gmx_device_info_t;
  struct gmx_domdec_zones_t;
  struct gmx_enerdata_t;
  struct gmx_hw_info_t;
  struct gmx_mtop_t;
+struct gmx_nbnxm_gpu_t;
  struct gmx_wallcycle;
  struct interaction_const_t;
+struct nbnxn_atomdata_t;
  struct nonbonded_verlet_t;
  class PairSearch;
  class PairlistSets;
@@ -153,9 +151,13 @@ class GpuEventSynchronizer;
  namespace gmx
  {
  class ForceWithShiftForces;
+class GpuBonded;
  template<typename>
  class ListOfLists;
  class MDLogger;
+template<typename>
+class Range;
+class StepWorkload;
  class UpdateGroupsCog;
  } // namespace gmx
  
@@ -223,7 +225,7 @@ public:
                         std::unique_ptr<PairSearch>       pairSearch,
                         std::unique_ptr<nbnxn_atomdata_t> nbat,
                         const Nbnxm::KernelSetup&         kernelSetup,
-                       gmx_nbnxn_gpu_t*                  gpu_nbv,
+                       gmx_nbnxm_gpu_t*                  gpu_nbv,
                         gmx_wallcycle*                    wcycle);
  
      ~nonbonded_verlet_t();
@@ -382,19 +384,7 @@ public:
      void changePairlistRadii(real rlistOuter, real rlistInner);
  
      //! Set up internal flags that indicate what type of short-range work there is.
-    void setupGpuShortRangeWork(const gmx::GpuBonded* gpuBonded, const gmx::InteractionLocality iLocality)
-    {
-        if (useGpu() && !emulateGpu())
-        {
-            Nbnxm::setupGpuShortRangeWork(gpu_nbv, gpuBonded, iLocality);
-        }
-    }
-
-    //! Returns true if there is GPU short-range work for the given atom locality.
-    bool haveGpuShortRangeWork(const gmx::AtomLocality aLocality)
-    {
-        return ((useGpu() && !emulateGpu()) && Nbnxm::haveGpuShortRangeWork(gpu_nbv, aLocality));
-    }
+    void setupGpuShortRangeWork(const gmx::GpuBonded* gpuBonded, gmx::InteractionLocality iLocality);
  
      // TODO: Make all data members private
  public:
@@ -413,7 +403,7 @@ private:
  
  public:
      //! GPU Nbnxm data, only used with a physical GPU (TODO: use unique_ptr)
-    gmx_nbnxn_gpu_t* gpu_nbv;
+    gmx_nbnxm_gpu_t* gpu_nbv;
  };
  
  namespace Nbnxm
diff --git a/src/gromacs/nbnxm/nbnxm_gpu.h b/src/gromacs/nbnxm/nbnxm_gpu.h

index 035d5f1ae8edf13cfb969920a15bfbc13adc7251..adbbcf7f0caaf3b0a4f8e05c13da8f3bb107164e 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_gpu.h
+++ b/src/gromacs/nbnxm/nbnxm_gpu.h
@@ -47,12 +47,10 @@
  #include "gromacs/gpu_utils/gpu_macros.h"
  #include "gromacs/math/vectypes.h"
  #include "gromacs/mdtypes/locality.h"
+#include "gromacs/nbnxm/atomdata.h"
  #include "gromacs/utility/basedefinitions.h"
  #include "gromacs/utility/real.h"
  
-#include "atomdata.h"
-#include "gpu_types.h"
-
  struct interaction_const_t;
  struct nbnxn_atomdata_t;
  struct gmx_wallcycle;
@@ -80,7 +78,7 @@ class Grid;
   * \param [in]    aloc      Atom locality flag.
   */
  GPU_FUNC_QUALIFIER
-void gpu_copy_xq_to_gpu(gmx_nbnxn_gpu_t gmx_unused*   nb,
+void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t gmx_unused*   nb,
                          const struct nbnxn_atomdata_t gmx_unused* nbdata,
                          gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
  
@@ -95,7 +93,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxn_gpu_t gmx_unused*   nb,
   *
   */
  GPU_FUNC_QUALIFIER
-void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused* nb,
+void gpu_launch_kernel(gmx_nbnxm_gpu_t gmx_unused* nb,
                         const gmx::StepWorkload gmx_unused& stepWork,
                         gmx::InteractionLocality gmx_unused iloc) GPU_FUNC_TERM;
  
@@ -135,7 +133,7 @@ void gpu_launch_kernel(gmx_nbnxn_gpu_t gmx_unused* nb,
   * \param [in]    numParts  Number of parts the pair list is split into in the rolling kernel.
   */
  GPU_FUNC_QUALIFIER
-void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused* nb,
+void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t gmx_unused* nb,
                                   gmx::InteractionLocality gmx_unused iloc,
                                   int gmx_unused numParts) GPU_FUNC_TERM;
  
@@ -144,7 +142,7 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t gmx_unused* nb,
   * (and energies/shift forces if required).
   */
  GPU_FUNC_QUALIFIER
-void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused* nb,
+void gpu_launch_cpyback(gmx_nbnxm_gpu_t gmx_unused* nb,
                          nbnxn_atomdata_t gmx_unused* nbatom,
                          const gmx::StepWorkload gmx_unused& stepWork,
                          gmx::AtomLocality gmx_unused aloc) GPU_FUNC_TERM;
@@ -187,7 +185,7 @@ void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused* nb,
   * \returns                   True if the nonbonded tasks associated with \p aloc locality have completed
   */
  GPU_FUNC_QUALIFIER
-bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused* nb,
+bool gpu_try_finish_task(gmx_nbnxm_gpu_t gmx_unused* nb,
                           const gmx::StepWorkload gmx_unused& stepWork,
                           gmx::AtomLocality gmx_unused aloc,
                           real gmx_unused* e_lj,
@@ -211,7 +209,7 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused* nb,
   * \param[out] shiftForces Shift forces buffer to accumulate into
   * \param[out] wcycle         Pointer to wallcycle data structure               */
  GPU_FUNC_QUALIFIER
-float gpu_wait_finish_task(gmx_nbnxn_gpu_t gmx_unused* nb,
+float gpu_wait_finish_task(gmx_nbnxm_gpu_t gmx_unused* nb,
                             const gmx::StepWorkload gmx_unused& stepWork,
                             gmx::AtomLocality gmx_unused aloc,
                             real gmx_unused* e_lj,
@@ -228,7 +226,7 @@ int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t gmx_unused& ic)
   * Called on the NS step and performs (re-)allocations and memory copies. !*/
  CUDA_FUNC_QUALIFIER
  void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused& gridSet,
-                                gmx_nbnxn_gpu_t gmx_unused* gpu_nbv) CUDA_FUNC_TERM;
+                                gmx_nbnxm_gpu_t gmx_unused* gpu_nbv) CUDA_FUNC_TERM;
  
  /*! \brief X buffer operations on GPU: performs conversion from rvec to nb format.
   *
@@ -244,7 +242,7 @@ void nbnxn_gpu_init_x_to_nbat_x(const Nbnxm::GridSet gmx_unused& gridSet,
  CUDA_FUNC_QUALIFIER
  void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid,
                             bool gmx_unused setFillerCoords,
-                           gmx_nbnxn_gpu_t gmx_unused* gpu_nbv,
+                           gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
                             DeviceBuffer<float> gmx_unused d_x,
                             GpuEventSynchronizer gmx_unused* xReadyOnDevice,
                             gmx::AtomLocality gmx_unused locality,
@@ -256,7 +254,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid gmx_unused& grid,
   * \param[in] interactionLocality  Local or NonLocal sync point
   */
  CUDA_FUNC_QUALIFIER
-void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_gpu_t gmx_unused* nb,
+void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxm_gpu_t gmx_unused* nb,
                                        gmx::InteractionLocality gmx_unused interactionLocality) CUDA_FUNC_TERM;
  
  /*! \brief Set up internal flags that indicate what type of short-range work there is.
@@ -272,7 +270,7 @@ void nbnxnInsertNonlocalGpuDependency(const gmx_nbnxn_gpu_t gmx_unused* nb,
   * \param[in]     iLocality  Interaction locality identifier
   */
  GPU_FUNC_QUALIFIER
-void setupGpuShortRangeWork(gmx_nbnxn_gpu_t gmx_unused* nb,
+void setupGpuShortRangeWork(gmx_nbnxm_gpu_t gmx_unused* nb,
                              const gmx::GpuBonded gmx_unused* gpuBonded,
                              gmx::InteractionLocality gmx_unused iLocality) GPU_FUNC_TERM;
  
@@ -286,13 +284,13 @@ void setupGpuShortRangeWork(gmx_nbnxn_gpu_t gmx_unused* nb,
   * \param[in]     aLocality Atom locality identifier
   */
  GPU_FUNC_QUALIFIER
-bool haveGpuShortRangeWork(const gmx_nbnxn_gpu_t gmx_unused* nb, gmx::AtomLocality gmx_unused aLocality)
+bool haveGpuShortRangeWork(const gmx_nbnxm_gpu_t gmx_unused* nb, gmx::AtomLocality gmx_unused aLocality)
          GPU_FUNC_TERM_WITH_RETURN(false);
  
  /*! \brief Initialization for F buffer operations on GPU */
  CUDA_FUNC_QUALIFIER
  void nbnxn_gpu_init_add_nbat_f_to_f(const int gmx_unused* cell,
-                                    gmx_nbnxn_gpu_t gmx_unused* gpu_nbv,
+                                    gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
                                      int gmx_unused       natoms_total,
                                      GpuEventSynchronizer gmx_unused* localReductionDone) CUDA_FUNC_TERM;
  
@@ -315,7 +313,7 @@ void nbnxn_gpu_init_add_nbat_f_to_f(const int gmx_unused* cell,
  CUDA_FUNC_QUALIFIER
  void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality,
                                 DeviceBuffer<float> gmx_unused totalForcesDevice,
-                               gmx_nbnxn_gpu_t gmx_unused* gpu_nbv,
+                               gmx_nbnxm_gpu_t gmx_unused* gpu_nbv,
                                 void gmx_unused*                           pmeForcesDevice,
                                 gmx::ArrayRef<GpuEventSynchronizer* const> gmx_unused dependencyList,
                                 int gmx_unused atomStart,
@@ -327,7 +325,7 @@ void nbnxn_gpu_add_nbat_f_to_f(gmx::AtomLocality gmx_unused atomLocality,
   * \param[in] nb                   The nonbonded data GPU structure
   */
  CUDA_FUNC_QUALIFIER
-void nbnxn_wait_x_on_device(gmx_nbnxn_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
+void nbnxn_wait_x_on_device(gmx_nbnxm_gpu_t gmx_unused* nb) CUDA_FUNC_TERM;
  
  } // namespace Nbnxm
  #endif
diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp

index cb3a2309297efe4fb00406e14d32d9ea4ef706d4..745414b67d5209de070969cad624f556be0e65f4 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_setup.cpp
+++ b/src/gromacs/nbnxm/nbnxm_setup.cpp
@@ -49,6 +49,7 @@
  #include "gromacs/mdtypes/commrec.h"
  #include "gromacs/mdtypes/forcerec.h"
  #include "gromacs/mdtypes/inputrec.h"
+#include "gromacs/nbnxm/atomdata.h"
  #include "gromacs/nbnxm/gpu_data_mgmt.h"
  #include "gromacs/nbnxm/nbnxm.h"
  #include "gromacs/nbnxm/pairlist_tuning.h"
@@ -56,8 +57,6 @@
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/logger.h"
  
-#include "atomdata.h"
-#include "gpu_types.h"
  #include "grid.h"
  #include "nbnxm_geometry.h"
  #include "nbnxm_simd.h"
@@ -321,7 +320,7 @@ namespace Nbnxm
  {
  
  /*! \brief Gets and returns the minimum i-list count for balacing based on the GPU used or env.var. when set */
-static int getMinimumIlistCountForGpuBalancing(gmx_nbnxn_gpu_t* nbnxmGpu)
+static int getMinimumIlistCountForGpuBalancing(gmx_nbnxm_gpu_t* nbnxmGpu)
  {
      int minimumIlistCount;
  
@@ -441,7 +440,7 @@ std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger&     mdlo
                          fr->nbfp, mimimumNumEnergyGroupNonbonded,
                          (useGpu || emulateGpu) ? 1 : gmx_omp_nthreads_get(emntNonbonded));
  
-    gmx_nbnxn_gpu_t* gpu_nbv                          = nullptr;
+    gmx_nbnxm_gpu_t* gpu_nbv                          = nullptr;
      int              minimumIlistCountForGpuBalancing = 0;
      if (useGpu)
      {
@@ -470,7 +469,7 @@ nonbonded_verlet_t::nonbonded_verlet_t(std::unique_ptr<PairlistSets>     pairlis
                                         std::unique_ptr<PairSearch>       pairSearch,
                                         std::unique_ptr<nbnxn_atomdata_t> nbat_in,
                                         const Nbnxm::KernelSetup&         kernelSetup,
-                                       gmx_nbnxn_gpu_t*                  gpu_nbv_ptr,
+                                       gmx_nbnxm_gpu_t*                  gpu_nbv_ptr,
                                         gmx_wallcycle*                    wcycle) :
      pairlistSets_(std::move(pairlistSets)),
      pairSearch_(std::move(pairSearch)),
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp

index 356afeb9fb6c9ffed4f25d3030dc9b205e2edccc..c795937a8441dbacd40faa246dcbf22eee1129fc 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
@@ -341,7 +341,7 @@ static inline cl_kernel selectPruneKernel(cl_kernel kernel_pruneonly[], bool fir
   *  OpenCL kernel objects are cached in nb. If the requested kernel is not
   *  found in the cache, it will be created and the cache will be updated.
   */
-static inline cl_kernel select_nbnxn_kernel(gmx_nbnxn_ocl_t* nb, int eeltype, int evdwtype, bool bDoEne, bool bDoPrune)
+static inline cl_kernel select_nbnxn_kernel(gmx_nbnxm_gpu_t* nb, int eeltype, int evdwtype, bool bDoEne, bool bDoPrune)
  {
      const char* kernel_name_to_run;
      cl_kernel*  kernel_ptr;
@@ -471,7 +471,7 @@ static void sync_ocl_event(cl_command_queue stream, cl_event* ocl_event)
  }
  
  /*! \brief Launch asynchronously the xq buffer host to device copy. */
-void gpu_copy_xq_to_gpu(gmx_nbnxn_ocl_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
+void gpu_copy_xq_to_gpu(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom, const AtomLocality atomLocality)
  {
      GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
  
@@ -575,7 +575,7 @@ void gpu_copy_xq_to_gpu(gmx_nbnxn_ocl_t* nb, const nbnxn_atomdata_t* nbatom, con
     misc_ops_done event to record the point in time when the above  operations
     are finished and synchronize with this event in the non-local stream.
   */
-void gpu_launch_kernel(gmx_nbnxn_ocl_t* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
+void gpu_launch_kernel(gmx_nbnxm_gpu_t* nb, const gmx::StepWorkload& stepWork, const Nbnxm::InteractionLocality iloc)
  {
      cl_atomdata_t*   adat   = nb->atdat;
      cl_nbparam_t*    nbp    = nb->nbparam;
@@ -713,7 +713,7 @@ static inline int calc_shmem_required_prune(const int num_threads_z)
   * Launch the pairlist prune only kernel for the given locality.
   * \p numParts tells in how many parts, i.e. calls the list will be pruned.
   */
-void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t* nb, const InteractionLocality iloc, const int numParts)
+void gpu_launch_kernel_pruneonly(gmx_nbnxm_gpu_t* nb, const InteractionLocality iloc, const int numParts)
  {
      cl_atomdata_t*   adat    = nb->atdat;
      cl_nbparam_t*    nbp     = nb->nbparam;
@@ -839,7 +839,7 @@ void gpu_launch_kernel_pruneonly(gmx_nbnxn_gpu_t* nb, const InteractionLocality
   * Launch asynchronously the download of nonbonded forces from the GPU
   * (and energies/shift forces if required).
   */
-void gpu_launch_cpyback(gmx_nbnxn_ocl_t*         nb,
+void gpu_launch_cpyback(gmx_nbnxm_gpu_t*         nb,
                          struct nbnxn_atomdata_t* nbatom,
                          const gmx::StepWorkload& stepWork,
                          const AtomLocality       aloc)
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp

index cb42ce9ccafdf8bc3ce7a7bd94b0da09ac385311..4943a8e0dd50afe97fcd040375e29ed4d9a2e4e4 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -52,11 +52,6 @@
  
  #include <cmath>
  
-// TODO We would like to move this down, but the way gmx_nbnxn_gpu_t
-//      is currently declared means this has to be before gpu_types.h
-#include "nbnxm_ocl_types.h"
-
-// TODO Remove this comment when the above order issue is resolved
  #include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/gpu_utils/oclutils.h"
  #include "gromacs/hardware/gpu_hw_info.h"
@@ -79,6 +74,7 @@
  #include "gromacs/utility/smalloc.h"
  
  #include "nbnxm_ocl_internal.h"
+#include "nbnxm_ocl_types.h"
  
  namespace Nbnxm
  {
@@ -411,7 +407,7 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interacti
      {
          return;
      }
-    gmx_nbnxn_ocl_t* nb  = nbv->gpu_nbv;
+    gmx_nbnxm_gpu_t* nb  = nbv->gpu_nbv;
      cl_nbparam_t*    nbp = nb->nbparam;
  
      set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
@@ -531,7 +527,7 @@ static void nbnxn_gpu_create_context(gmx_device_runtime_data_t* runtimeData,
  }
  
  /*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */
-static cl_kernel nbnxn_gpu_create_kernel(gmx_nbnxn_ocl_t* nb, const char* kernel_name)
+static cl_kernel nbnxn_gpu_create_kernel(gmx_nbnxm_gpu_t* nb, const char* kernel_name)
  {
      cl_kernel kernel;
      cl_int    cl_error;
@@ -548,7 +544,7 @@ static cl_kernel nbnxn_gpu_create_kernel(gmx_nbnxn_ocl_t* nb, const char* kernel
  
  /*! \brief Clears nonbonded shift force output array and energy outputs on the GPU.
   */
-static void nbnxn_ocl_clear_e_fshift(gmx_nbnxn_ocl_t* nb)
+static void nbnxn_ocl_clear_e_fshift(gmx_nbnxm_gpu_t* nb)
  {
  
      cl_int           cl_error;
@@ -581,7 +577,7 @@ static void nbnxn_ocl_clear_e_fshift(gmx_nbnxn_ocl_t* nb)
  }
  
  /*! \brief Initializes the OpenCL kernel pointers of the nbnxn_ocl_ptr_t input data structure. */
-static void nbnxn_gpu_init_kernels(gmx_nbnxn_ocl_t* nb)
+static void nbnxn_gpu_init_kernels(gmx_nbnxm_gpu_t* nb)
  {
      /* Init to 0 main kernel arrays */
      /* They will be later on initialized in select_nbnxn_kernel */
@@ -610,7 +606,7 @@ static void nbnxn_gpu_init_kernels(gmx_nbnxn_ocl_t* nb)
   *  Initializes members of the atomdata and nbparam structs and
   *  clears e/fshift output buffers.
   */
-static void nbnxn_ocl_init_const(gmx_nbnxn_ocl_t*                nb,
+static void nbnxn_ocl_init_const(gmx_nbnxm_gpu_t*                nb,
                                   const interaction_const_t*      ic,
                                   const PairlistParams&           listParams,
                                   const nbnxn_atomdata_t::Params& nbatParams)
@@ -621,14 +617,14 @@ static void nbnxn_ocl_init_const(gmx_nbnxn_ocl_t*                nb,
  
  
  //! This function is documented in the header file
-gmx_nbnxn_ocl_t* gpu_init(const gmx_device_info_t*   deviceInfo,
+gmx_nbnxm_gpu_t* gpu_init(const gmx_device_info_t*   deviceInfo,
                            const interaction_const_t* ic,
                            const PairlistParams&      listParams,
                            const nbnxn_atomdata_t*    nbat,
                            const int                  rank,
                            const gmx_bool             bLocalAndNonlocal)
  {
-    gmx_nbnxn_ocl_t*            nb;
+    gmx_nbnxm_gpu_t*            nb;
      cl_int                      cl_error;
      cl_command_queue_properties queue_properties;
  
@@ -732,7 +728,7 @@ gmx_nbnxn_ocl_t* gpu_init(const gmx_device_info_t*   deviceInfo,
  
  /*! \brief Clears the first natoms_clear elements of the GPU nonbonded force output array.
   */
-static void nbnxn_ocl_clear_f(gmx_nbnxn_ocl_t* nb, int natoms_clear)
+static void nbnxn_ocl_clear_f(gmx_nbnxm_gpu_t* nb, int natoms_clear)
  {
      if (natoms_clear == 0)
      {
@@ -752,7 +748,7 @@ static void nbnxn_ocl_clear_f(gmx_nbnxn_ocl_t* nb, int natoms_clear)
  }
  
  //! This function is documented in the header file
-void gpu_clear_outputs(gmx_nbnxn_ocl_t* nb, bool computeVirial)
+void gpu_clear_outputs(gmx_nbnxm_gpu_t* nb, bool computeVirial)
  {
      nbnxn_ocl_clear_f(nb, nb->atdat->natoms);
      /* clear shift force array and energies if the outputs were
@@ -769,7 +765,7 @@ void gpu_clear_outputs(gmx_nbnxn_ocl_t* nb, bool computeVirial)
  }
  
  //! This function is documented in the header file
-void gpu_init_pairlist(gmx_nbnxn_ocl_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
+void gpu_init_pairlist(gmx_nbnxm_gpu_t* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
  {
      char sbuf[STRLEN];
      // Timing accumulation should happen only if there was work to do
@@ -830,7 +826,7 @@ void gpu_init_pairlist(gmx_nbnxn_ocl_t* nb, const NbnxnPairlistGpu* h_plist, con
  }
  
  //! This function is documented in the header file
-void gpu_upload_shiftvec(gmx_nbnxn_ocl_t* nb, const nbnxn_atomdata_t* nbatom)
+void gpu_upload_shiftvec(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbatom)
  {
      cl_atomdata_t*   adat = nb->atdat;
      cl_command_queue ls   = nb->stream[InteractionLocality::Local];
@@ -845,7 +841,7 @@ void gpu_upload_shiftvec(gmx_nbnxn_ocl_t* nb, const nbnxn_atomdata_t* nbatom)
  }
  
  //! This function is documented in the header file
-void gpu_init_atomdata(gmx_nbnxn_ocl_t* nb, const nbnxn_atomdata_t* nbat)
+void gpu_init_atomdata(gmx_nbnxm_gpu_t* nb, const nbnxn_atomdata_t* nbat)
  {
      cl_int           cl_error;
      int              nalloc, natoms;
@@ -1005,7 +1001,7 @@ static void free_gpu_device_runtime_data(gmx_device_runtime_data_t* runData)
  }
  
  //! This function is documented in the header file
-void gpu_free(gmx_nbnxn_ocl_t* nb)
+void gpu_free(gmx_nbnxm_gpu_t* nb)
  {
      if (nb == nullptr)
      {
@@ -1106,7 +1102,7 @@ void gpu_free(gmx_nbnxn_ocl_t* nb)
  }
  
  //! This function is documented in the header file
-gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxn_ocl_t* nb)
+gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(gmx_nbnxm_gpu_t* nb)
  {
      return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
  }
@@ -1121,13 +1117,13 @@ void gpu_reset_timings(nonbonded_verlet_t* nbv)
  }
  
  //! This function is documented in the header file
-int gpu_min_ci_balanced(gmx_nbnxn_ocl_t* nb)
+int gpu_min_ci_balanced(gmx_nbnxm_gpu_t* nb)
  {
      return nb != nullptr ? gpu_min_ci_balanced_factor * nb->dev_info->compute_units : 0;
  }
  
  //! This function is documented in the header file
-gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxn_ocl_t* nb)
+gmx_bool gpu_is_kernel_ewald_analytical(const gmx_nbnxm_gpu_t* nb)
  {
      return ((nb->nbparam->eeltype == eelOclEWALD_ANA) || (nb->nbparam->eeltype == eelOclEWALD_ANA_TWIN));
  }
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp

index 36c55d9037729a549f97178fcb165ded46ec5751..3ea5cc186df8f33c38837c98f29dee13c91f4c80 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
@@ -167,7 +167,7 @@ static std::string makeDefinesForKernelTypes(bool bFastGen, int eeltype, int vdw
   *
   * Does not throw
   */
-void nbnxn_gpu_compile_kernels(gmx_nbnxn_ocl_t* nb)
+void nbnxn_gpu_compile_kernels(gmx_nbnxm_gpu_t* nb)
  {
      gmx_bool   bFastGen = TRUE;
      cl_program program  = nullptr;
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h

index b71628443bc9cb6b1f4a0dbf83b9061de8e177d2..303968ea96ef98672e5ef18b223ca16a3ef4599d 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h
@@ -274,7 +274,7 @@ typedef struct Nbnxm::gpu_timers_t cl_timers_t;
  /*! \internal
   * \brief Main data structure for OpenCL nonbonded force calculations.
   */
-struct gmx_nbnxn_ocl_t
+struct gmx_nbnxm_gpu_t
  {
      const gmx_device_info_t*          dev_info;    /**< OpenCL device information    */
      struct gmx_device_runtime_data_t* dev_rundata; /**< OpenCL runtime data (context, kernels) */
diff --git a/src/gromacs/nbnxm/pairlist.cpp b/src/gromacs/nbnxm/pairlist.cpp

index 6dee69782e17ecd717a96b09e2b2b345dd389883..b76ad3e9a75a5537ef9a3870ab286fda58ff2329 100644 (file)
--- a/src/gromacs/nbnxm/pairlist.cpp
+++ b/src/gromacs/nbnxm/pairlist.cpp
@@ -54,6 +54,7 @@
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
  #include "gromacs/mdtypes/group.h"
  #include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/nbnxm/atomdata.h"
  #include "gromacs/nbnxm/gpu_data_mgmt.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/pbcutil/pbc.h"
@@ -65,7 +66,6 @@
  #include "gromacs/utility/listoflists.h"
  #include "gromacs/utility/smalloc.h"
  
-#include "atomdata.h"
  #include "boundingboxes.h"
  #include "clusterdistancekerneltype.h"
  #include "gridset.h"
diff --git a/src/gromacs/nbnxm/pairsearch.h b/src/gromacs/nbnxm/pairsearch.h

index 4263d9ba5bbe484d7c8779a346da97db7999a68c..357e78a19aaf751975ee8f07946b16b184bb6ba9 100644 (file)
--- a/src/gromacs/nbnxm/pairsearch.h
+++ b/src/gromacs/nbnxm/pairsearch.h
@@ -56,12 +56,12 @@
  
  #include "gromacs/domdec/domdec.h"
  #include "gromacs/math/vectypes.h"
+#include "gromacs/nbnxm/atomdata.h"
  #include "gromacs/timing/cyclecounter.h"
  #include "gromacs/utility/alignedallocator.h"
  #include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/real.h"
  
-#include "atomdata.h"
  #include "gridset.h"
  #include "pairlist.h"
  
diff --git a/src/gromacs/nbnxm/prunekerneldispatch.cpp b/src/gromacs/nbnxm/prunekerneldispatch.cpp

index 141b1ea9034d131224754efdc14a522a2c938e38..574d9c143ae851755d994a7b3461f23b57a10e11 100644 (file)
--- a/src/gromacs/nbnxm/prunekerneldispatch.cpp
+++ b/src/gromacs/nbnxm/prunekerneldispatch.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -41,6 +41,7 @@
  #include "gromacs/utility/gmxassert.h"
  
  #include "clusterdistancekerneltype.h"
+#include "nbnxm_gpu.h"
  #include "pairlistset.h"
  #include "pairlistsets.h"
  #include "kernels_reference/kernel_ref_prune.h"
author	Mark Abraham <mark.j.abraham@gmail.com>
	Tue, 14 Jan 2020 15:53:26 +0000 (16:53 +0100)
committer	Paul Bauer <paul.bauer.q@gmail.com>
	Thu, 16 Jan 2020 10:55:02 +0000 (11:55 +0100)
docs/doxygen/suppressions.txt		patch \| blob \| history
src/gromacs/mdlib/sim_util.cpp		patch \| blob \| history
src/gromacs/nbnxm/atomdata.cpp		patch \| blob \| history
src/gromacs/nbnxm/atomdata.h		patch \| blob \| history
src/gromacs/nbnxm/clusterdistancekerneltype.h		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_common.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_common_utils.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_data_mgmt.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_jit_support.h		patch \| blob \| history
src/gromacs/nbnxm/gpu_types.h	[deleted file]	patch \| blob \| history
src/gromacs/nbnxm/grid.cpp		patch \| blob \| history
src/gromacs/nbnxm/gridset.cpp		patch \| blob \| history
src/gromacs/nbnxm/gridsetdata.h		patch \| blob \| history
src/gromacs/nbnxm/kernel_common.h		patch \| blob \| history
src/gromacs/nbnxm/kerneldispatch.cpp		patch \| blob \| history
src/gromacs/nbnxm/nbnxm.cpp		patch \| blob \| history
src/gromacs/nbnxm/nbnxm.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_gpu.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_setup.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_types.h		patch \| blob \| history
src/gromacs/nbnxm/pairlist.cpp		patch \| blob \| history
src/gromacs/nbnxm/pairsearch.h		patch \| blob \| history
src/gromacs/nbnxm/prunekerneldispatch.cpp		patch \| blob \| history