Decouple task assignment from task execution

author Mark Abraham <mark.j.abraham@gmail.com>

Fri, 4 Aug 2017 11:52:33 +0000 (13:52 +0200)

committer Mark Abraham <mark.j.abraham@gmail.com>

Sun, 6 Aug 2017 15:58:05 +0000 (17:58 +0200)
author Mark Abraham <mark.j.abraham@gmail.com>
Fri, 4 Aug 2017 11:52:33 +0000 (13:52 +0200)
committer Mark Abraham <mark.j.abraham@gmail.com>
Sun, 6 Aug 2017 15:58:05 +0000 (17:58 +0200)
diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp

index 81a4289d74d2528d0ad5f8f8be83fff8e52388e4..1a5693187afe11c9846f9b5e63c6f81e250a3e2e 100644 (file)
--- a/src/gromacs/domdec/domdec.cpp
+++ b/src/gromacs/domdec/domdec.cpp
@@ -5388,28 +5388,24 @@ static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
  }
  #endif
  
-void dd_setup_dlb_resource_sharing(t_commrec           gmx_unused *cr,
-                                   const gmx_hw_info_t gmx_unused *hwinfo,
-                                   const gmx_hw_opt_t  gmx_unused &hw_opt)
+void dd_setup_dlb_resource_sharing(t_commrec            *cr,
+                                   int                   gpu_id)
  {
  #if GMX_MPI
      int           physicalnode_id_hash;
-    int           gpu_id;
      gmx_domdec_t *dd;
      MPI_Comm      mpi_comm_pp_physicalnode;
  
-    if (!(cr->duty & DUTY_PP) || hw_opt.gpu_opt.n_dev_use == 0)
+    if (!(cr->duty & DUTY_PP) || gpu_id < 0)
      {
-        /* Only PP nodes (currently) use GPUs.
-         * If we don't have GPUs, there are no resources to share.
+        /* Only ranks with short-ranged tasks (currently) use GPUs.
+         * If we don't have GPUs assigned, there are no resources to share.
           */
          return;
      }
  
      physicalnode_id_hash = gmx_physicalnode_id_hash();
  
-    gpu_id = get_gpu_device_id(hwinfo->gpu_info, &hw_opt.gpu_opt, cr->rank_pp_intranode);
-
      dd = cr->dd;
  
      if (debug)
@@ -5440,6 +5436,9 @@ void dd_setup_dlb_resource_sharing(t_commrec           gmx_unused *cr,
      {
          MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
      }
+#else
+    GMX_UNUSED_VALUE(cr);
+    GMX_UNUSED_VALUE(gpu_id);
  #endif
  }
  
diff --git a/src/gromacs/domdec/domdec.h b/src/gromacs/domdec/domdec.h

index 814fd614b59df11704769ef5f9f8faf0b9388888..d23c444a13feb9ab2149beee4c7d706f931eff56 100644 (file)
--- a/src/gromacs/domdec/domdec.h
+++ b/src/gromacs/domdec/domdec.h
@@ -61,7 +61,6 @@
  #include <stdio.h>
  
  #include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/hardware/hw_info.h"
  #include "gromacs/math/vectypes.h"
  #include "gromacs/mdlib/vsite.h"
  #include "gromacs/mdtypes/forcerec.h"
@@ -193,16 +192,15 @@ void dd_dlb_lock(struct gmx_domdec_t *dd);
  /*! \brief Clear a lock such that with DLB=auto DLB may get turned on later */
  void dd_dlb_unlock(struct gmx_domdec_t *dd);
  
-/*! \brief Set up communication for averaging GPU wait times over ranks
+/*! \brief Set up communication for averaging GPU wait times over domains
   *
   * When domains (PP MPI ranks) share a GPU, the individual GPU wait times
   * are meaningless, as it depends on the order in which tasks on the same
   * GPU finish. Therefore there wait times need to be averaged over the ranks
   * sharing the same GPU. This function sets up the communication for that.
   */
-void dd_setup_dlb_resource_sharing(struct t_commrec           *cr,
-                                   const gmx_hw_info_t        *hwinfo,
-                                   const gmx_hw_opt_t         &hw_opt);
+void dd_setup_dlb_resource_sharing(t_commrec           *cr,
+                                   int                  gpu_id);
  
  /*! \brief Collects local rvec arrays \p lv to \p v on the master rank */
  void dd_collect_vec(struct gmx_domdec_t *dd,
diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu

index 2ad9ec26bae12a7c7df95ca8de12d2f6a8975ab7..4ae442a374ba5fe5770b92223fd9927c6b118849 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gpu_utils/gpu_utils.cu
@@ -280,25 +280,22 @@ static bool getApplicationClocks(const gmx_device_info_t *cuda_dev,
  
  /*! \brief Tries to set application clocks for the GPU with the given index.
   *
- * The variable \gpuid is the index of the GPU in the gpu_info.cuda_dev array
- * to handle the application clocks for. Application clocks are set to the
- * max supported value to increase performance if application clock permissions
- * allow this. For future GPU architectures a more sophisticated scheme might be
- * required.
+ * Application clocks are set to the max supported value to increase
+ * performance if application clock permissions allow this. For future
+ * GPU architectures a more sophisticated scheme might be required.
   *
   * \todo Refactor this into a detection phase and a work phase. Also
   * refactor to remove compile-time dependence on logging header.
   *
   * \param     mdlog         log file to write to
- * \param[in] gpuid         index of the GPU to set application clocks for
- * \param[in] gpu_info      GPU info of all detected devices in the system.
+ * \param[in] cuda_dev      GPU device info for the GPU in use
   * \returns                 true if no error occurs during application clocks handling.
   */
  static gmx_bool init_gpu_application_clocks(
-        const gmx::MDLogger &mdlog, int gmx_unused gpuid,
-        const gmx_gpu_info_t gmx_unused *gpu_info)
+        const gmx::MDLogger &mdlog,
+        gmx_device_info_t   *cuda_dev)
  {
-    const cudaDeviceProp *prop                        = &gpu_info->gpu_dev[gpuid].prop;
+    const cudaDeviceProp *prop                        = &cuda_dev->prop;
      int                   cuda_compute_capability     = prop->major * 10 + prop->minor;
      gmx_bool              bGpuCanUseApplicationClocks =
          ((0 == gmx_wcmatch("*Tesla*", prop->name) && cuda_compute_capability >= 35 ) ||
@@ -344,8 +341,6 @@ static gmx_bool init_gpu_application_clocks(
          return false;
      }
  
-    gmx_device_info_t *cuda_dev = &(gpu_info->gpu_dev[gpuid]);
-
      if (!addNVMLDeviceId(cuda_dev))
      {
          return false;
@@ -453,53 +448,36 @@ static gmx_bool reset_gpu_application_clocks(const gmx_device_info_t gmx_unused
  #endif /* HAVE_NVML_APPLICATION_CLOCKS */
  }
  
-void init_gpu(const gmx::MDLogger &mdlog, int rank, int mygpu,
-              const struct gmx_gpu_info_t *gpu_info,
-              const struct gmx_gpu_opt_t *gpu_opt)
+void init_gpu(const gmx::MDLogger &mdlog, int rank,
+              gmx_device_info_t *deviceInfo)
  {
      cudaError_t stat;
      char        sbuf[STRLEN];
-    int         gpuid;
-
-    assert(gpu_info);
-    assert(gpu_opt);
-
-    if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
-    {
-        snprintf(sbuf, STRLEN, "On rank %d trying to initialize an non-existent GPU: "
-                 "there are %d selected GPU(s), but #%d was requested.",
-                 rank, gpu_opt->n_dev_use, mygpu);
-        gmx_incons(sbuf);
-    }
  
-    gpuid = gpu_info->gpu_dev[gpu_opt->dev_use[mygpu]].id;
+    assert(deviceInfo);
  
-    stat = cudaSetDevice(gpuid);
+    stat = cudaSetDevice(deviceInfo->id);
      if (stat != cudaSuccess)
      {
          snprintf(sbuf, STRLEN, "On rank %d failed to initialize GPU #%d",
-                 rank, mygpu);
+                 rank, deviceInfo->id);
          CU_RET_ERR(stat, sbuf);
      }
  
      if (debug)
      {
-        fprintf(stderr, "Initialized GPU ID #%d: %s\n", gpuid, gpu_info->gpu_dev[gpuid].prop.name);
+        fprintf(stderr, "Initialized GPU ID #%d: %s\n", deviceInfo->id, deviceInfo->prop.name);
      }
  
      //Ignoring return value as NVML errors should be treated not critical.
-    init_gpu_application_clocks(mdlog, gpuid, gpu_info);
+    init_gpu_application_clocks(mdlog, deviceInfo);
  }
  
-gmx_bool free_cuda_gpu(
-        int gmx_unused mygpu, char *result_str,
-        const gmx_gpu_info_t gmx_unused *gpu_info,
-        const gmx_gpu_opt_t gmx_unused *gpu_opt
-        )
+gmx_bool free_cuda_gpu(const gmx_device_info_t *deviceInfo,
+                       char                    *result_str)
  {
      cudaError_t  stat;
      gmx_bool     reset_gpu_application_clocks_status = true;
-    int          gpuid;
  
      assert(result_str);
  
@@ -511,10 +489,9 @@ gmx_bool free_cuda_gpu(
          fprintf(stderr, "Cleaning up context on GPU ID #%d\n", gpuid);
      }
  
-    gpuid = gpu_opt ? gpu_opt->dev_use[mygpu] : -1;
-    if (gpuid != -1)
+    if (deviceInfo != nullptr)
      {
-        reset_gpu_application_clocks_status = reset_gpu_application_clocks( &(gpu_info->gpu_dev[gpuid]) );
+        reset_gpu_application_clocks_status = reset_gpu_application_clocks(deviceInfo);
      }
  
      stat = cudaDeviceReset();
@@ -522,6 +499,16 @@ gmx_bool free_cuda_gpu(
      return (stat == cudaSuccess) && reset_gpu_application_clocks_status;
  }
  
+gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &gpu_info,
+                                 int                   deviceId)
+{
+    if (deviceId < 0 || deviceId >= gpu_info.n_dev)
+    {
+        gmx_incons("Invalid GPU deviceId requested");
+    }
+    return &gpu_info.gpu_dev[deviceId];
+}
+
  /*! \brief Returns true if the gpu characterized by the device properties is
   *  supported by the native gpu acceleration.
   *
diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h

index 0e9d04f66641ca2d2b7aed5e70f14538d260eaa6..2bb1e21f0c6488621db7207c16e0d38813ba5933 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils.h
+++ b/src/gromacs/gpu_utils/gpu_utils.h
@@ -50,6 +50,7 @@
  #include "gromacs/gpu_utils/gpu_macros.h"
  #include "gromacs/utility/basedefinitions.h"
  
+struct gmx_device_info_t;
  struct gmx_gpu_info_t;
  struct gmx_gpu_opt_t;
  
@@ -101,16 +102,14 @@ const char *getGpuCompatibilityDescription(const gmx_gpu_info_t &GPU_FUNC_ARGUME
  GPU_FUNC_QUALIFIER
  void free_gpu_info(const struct gmx_gpu_info_t *GPU_FUNC_ARGUMENT(gpu_info)) GPU_FUNC_TERM
  
-/*! \brief Initializes the GPU with the given index.
+/*! \brief Initializes the GPU described by \c deviceInfo.
   *
- * The varible \p mygpu is the index of the GPU to initialize in the
- * gpu_info.gpu_dev array.
+ * TODO Doxygen complains about these - probably a Doxygen bug, since
+ * the patterns here are the same as elsewhere in this header.
   *
- * \param      mdlog        log file to write to
- * \param[in]  rank         MPI rank of this process (for error output)
- * \param[in]  mygpu        index of the GPU to initialize
- * \param[in] gpu_info      GPU info of all detected devices in the system.
- * \param[in] gpu_opt       options for using the GPUs in gpu_info
+ *  param[in]    mdlog        log file to write to
+ *  param[in]    rank         MPI rank of this process (for error output)
+ * \param[inout] deviceInfo   device info of the GPU to initialize
   *
   * Issues a fatal error for any critical errors that occur during
   * initialization.
@@ -118,27 +117,33 @@ void free_gpu_info(const struct gmx_gpu_info_t *GPU_FUNC_ARGUMENT(gpu_info)) GPU
  GPU_FUNC_QUALIFIER
  void init_gpu(const gmx::MDLogger &GPU_FUNC_ARGUMENT(mdlog),
                int GPU_FUNC_ARGUMENT(rank),
-              int GPU_FUNC_ARGUMENT(mygpu),
-              const struct gmx_gpu_info_t *GPU_FUNC_ARGUMENT(gpu_info),
-              const gmx_gpu_opt_t *GPU_FUNC_ARGUMENT(gpu_opt)) GPU_FUNC_TERM
+              gmx_device_info_t *GPU_FUNC_ARGUMENT(deviceInfo)) GPU_FUNC_TERM
  
  /*! \brief Frees up the CUDA GPU used by the active context at the time of calling.
   *
   * The context is explicitly destroyed and therefore all data uploaded to the GPU
   * is lost. This should only be called when none of this data is required anymore.
   *
- * \param[in]  mygpu        index of the GPU clean up for
+ * \param[in]  deviceInfo   device info of the GPU to clean up for
   * \param[out] result_str   the message related to the error that occurred
   *                          during the initialization (if there was any).
- * \param[in] gpu_info      GPU info of all detected devices in the system.
- * \param[in] gpu_opt       options for using the GPUs in gpu_info
+ *
   * \returns                 true if no error occurs during the freeing.
   */
  CUDA_FUNC_QUALIFIER
-gmx_bool free_cuda_gpu(int CUDA_FUNC_ARGUMENT(mygpu),
-                       char *CUDA_FUNC_ARGUMENT(result_str),
-                       const gmx_gpu_info_t *CUDA_FUNC_ARGUMENT(gpu_info),
-                       const gmx_gpu_opt_t *CUDA_FUNC_ARGUMENT(gpu_opt)) CUDA_FUNC_TERM_WITH_RETURN(TRUE)
+gmx_bool free_cuda_gpu(const gmx_device_info_t *CUDA_FUNC_ARGUMENT(deviceInfo),
+                       char *CUDA_FUNC_ARGUMENT(result_str)) CUDA_FUNC_TERM_WITH_RETURN(TRUE)
+
+/*! \brief Return a pointer to the device info for \c deviceId
+ *
+ * \param[in] gpu_info      GPU info of all detected devices in the system.
+ * \param[in] deviceId      ID for the GPU device requested.
+ *
+ * \returns                 Pointer to the device info for \c deviceId.
+ */
+GPU_FUNC_QUALIFIER
+gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info),
+                                 int GPU_FUNC_ARGUMENT(deviceId)) GPU_FUNC_TERM_WITH_RETURN(NULL)
  
  /*! \brief Returns the device ID of the CUDA GPU currently in use.
   *
diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp

index e030186f78ac3bf46fe4a5c5869a6073ae4b47d9..efbb796879d2846b670cce914ad0dc80f231c05e 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
+++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
@@ -393,29 +393,17 @@ void get_gpu_device_info_string(char *s, const gmx_gpu_info_t &gpu_info, int ind
  
  //! This function is documented in the header file
  void init_gpu(const gmx::MDLogger               & /*mdlog*/,
-              int                               rank,
-              int                               mygpu,
-              const gmx_gpu_info_t             *gpu_info,
-              const gmx_gpu_opt_t              *gpu_opt
-              )
+              int                               /* rank */,
+              gmx_device_info_t                *deviceInfo)
  {
-    assert(gpu_opt);
-
-    if (mygpu < 0 || mygpu >= gpu_opt->n_dev_use)
-    {
-        char        sbuf[STRLEN];
-        sprintf(sbuf, "On rank %d trying to initialize an non-existent GPU: "
-                "there are %d selected GPU(s), but #%d was requested.",
-                rank, gpu_opt->n_dev_use, mygpu);
-        gmx_incons(sbuf);
-    }
+    assert(deviceInfo);
  
      // If the device is NVIDIA, for safety reasons we disable the JIT
      // caching as this is known to be broken at least until driver 364.19;
      // the cache does not always get regenerated when the source code changes,
      // e.g. if the path to the kernel sources remains the same
  
-    if (gpu_info->gpu_dev[mygpu].vendor_e == OCL_VENDOR_NVIDIA)
+    if (deviceInfo->vendor_e == OCL_VENDOR_NVIDIA)
      {
          // Ignore return values, failing to set the variable does not mean
          // that something will go wrong later.
@@ -439,6 +427,17 @@ int get_gpu_device_id(const gmx_gpu_info_t  &,
      return gpu_opt->dev_use[idx];
  }
  
+//! This function is documented in the header file
+gmx_device_info_t *getDeviceInfo(const gmx_gpu_info_t &gpu_info,
+                                 int                   deviceId)
+{
+    if (deviceId < 0 || deviceId >= gpu_info.n_dev)
+    {
+        gmx_incons("Invalid GPU deviceId requested");
+    }
+    return &gpu_info.gpu_dev[deviceId];
+}
+
  //! This function is documented in the header file
  char* get_ocl_gpu_device_name(const gmx_gpu_info_t *gpu_info,
                                const gmx_gpu_opt_t  *gpu_opt,
diff --git a/src/gromacs/gpu_utils/ocl_compiler.h b/src/gromacs/gpu_utils/ocl_compiler.h

index b5db76eb4cca5222a440ed4a17a9344fca999333..33e64941ceb107215c8ab192608ede8319ca1fb4 100644 (file)
--- a/src/gromacs/gpu_utils/ocl_compiler.h
+++ b/src/gromacs/gpu_utils/ocl_compiler.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -47,7 +47,6 @@
  #include <string>
  
  #include "gromacs/gpu_utils/oclutils.h"
-#include "gromacs/hardware/gpu_hw_info.h"
  
  namespace gmx
  {
diff --git a/src/gromacs/mdlib/force.h b/src/gromacs/mdlib/force.h

index 7874e723ad970fd05b46d3b69a262c88516954ab..93cd302140f55506ec8134256fd2e671fe12ddc8 100644 (file)
--- a/src/gromacs/mdlib/force.h
+++ b/src/gromacs/mdlib/force.h
@@ -45,6 +45,7 @@
  #include "gromacs/timing/wallcycle.h"
  #include "gromacs/utility/arrayref.h"
  
+struct gmx_device_info_t;
  struct gmx_edsam;
  struct gmx_gpu_info_t;
  struct gmx_groups_t;
@@ -214,7 +215,6 @@ void do_force_lowlevel(t_forcerec   *fr,
  
  void free_gpu_resources(const t_forcerec            *fr,
                          const t_commrec             *cr,
-                        const gmx_gpu_info_t        *gpu_info,
-                        const gmx_gpu_opt_t         *gpu_opt);
+                        const gmx_device_info_t     *deviceInfo);
  
  #endif
diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp

index c4ed97ebf7cd09b0e2fcedab48db461627d2d159..dd37fe8bf0421b0e04a0ae141978003ac7fe9ff4 100644 (file)
--- a/src/gromacs/mdlib/forcerec.cpp
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -2024,6 +2024,10 @@ init_interaction_const(FILE                       *fp,
      *interaction_const = ic;
  }
  
+/* TODO deviceInfo should be logically const, but currently
+ * init_gpu modifies it to set up NVML support. This could
+ * happen during the detection phase, and deviceInfo could
+ * the become const. */
  static void init_nb_verlet(FILE                *fp,
                             const gmx::MDLogger &mdlog,
                             nonbonded_verlet_t **nb_verlet,
@@ -2031,7 +2035,8 @@ static void init_nb_verlet(FILE                *fp,
                             const t_inputrec    *ir,
                             const t_forcerec    *fr,
                             const t_commrec     *cr,
-                           const char          *nbpu_opt)
+                           const char          *nbpu_opt,
+                           gmx_device_info_t   *deviceInfo)
  {
      nonbonded_verlet_t *nbv;
      int                 i;
@@ -2044,17 +2049,14 @@ static void init_nb_verlet(FILE                *fp,
      snew(nbv, 1);
  
      nbv->emulateGpu = (getenv("GMX_EMULATE_GPU") != nullptr);
-    nbv->bUseGPU    = (fr->gpu_opt->n_dev_use > 0);
+    nbv->bUseGPU    = deviceInfo != nullptr;
+
      GMX_RELEASE_ASSERT(!(nbv->emulateGpu && nbv->bUseGPU), "When GPU emulation is active, there cannot be a GPU assignment");
  
      if (nbv->bUseGPU)
      {
-        /* This PP MPI rank uses the GPU that the GPU assignment
-         * prepared for it, which is the entry in gpu_opt->dev_use
-         * corresponding to the index of this PP MPI rank within the
-         * set of such ranks on this node. */
-        init_gpu(mdlog, cr->nodeid, cr->rank_pp_intranode,
-                 &fr->hwinfo->gpu_info, fr->gpu_opt);
+        /* Use the assigned GPU. */
+        init_gpu(mdlog, cr->nodeid, deviceInfo);
      }
  
      nbv->nbs             = nullptr;
@@ -2170,11 +2172,9 @@ static void init_nb_verlet(FILE                *fp,
          /* init the NxN GPU data; the last argument tells whether we'll have
           * both local and non-local NB calculation on GPU */
          nbnxn_gpu_init(&nbv->gpu_nbv,
-                       &fr->hwinfo->gpu_info,
-                       fr->gpu_opt,
+                       deviceInfo,
                         fr->ic,
                         nbv->grp,
-                       cr->rank_pp_intranode,
                         cr->nodeid,
                         (nbv->ngrp > 1) && !bHybridGPURun);
  
@@ -2245,6 +2245,7 @@ void init_forcerec(FILE                *fp,
                     const char          *tabpfn,
                     const t_filenm      *tabbfnm,
                     const char          *nbpu_opt,
+                   gmx_device_info_t   *deviceInfo,
                     gmx_bool             bNoSolvOpt,
                     real                 print_force)
  {
@@ -2258,15 +2259,6 @@ void init_forcerec(FILE                *fp,
      gmx_bool       bFEP_NonBonded;
      int           *nm_ind, egp_flags;
  
-    if (fr->hwinfo == nullptr)
-    {
-        /* Detect hardware, gather information.
-         * In mdrun, hwinfo has already been set before calling init_forcerec.
-         * Here we ignore GPUs, as tools will not use them anyhow.
-         */
-        fr->hwinfo = gmx_detect_hardware(mdlog, cr, FALSE);
-    }
-
      /* By default we turn SIMD kernels on, but it might be turned off further down... */
      fr->use_simd_kernels = TRUE;
  
@@ -3136,7 +3128,7 @@ void init_forcerec(FILE                *fp,
              GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw, "With Verlet lists and no PME rcoulomb and rvdw should be identical");
          }
  
-        init_nb_verlet(fp, mdlog, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
+        init_nb_verlet(fp, mdlog, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt, deviceInfo);
      }
  
      if (ir->eDispCorr != edispcNO)
@@ -3175,10 +3167,9 @@ void pr_forcerec(FILE *fp, t_forcerec *fr)
   * in this run because the PME ranks have no knowledge of whether GPUs
   * are used or not, but all ranks need to enter the barrier below.
   */
-void free_gpu_resources(const t_forcerec     *fr,
-                        const t_commrec      *cr,
-                        const gmx_gpu_info_t *gpu_info,
-                        const gmx_gpu_opt_t  *gpu_opt)
+void free_gpu_resources(const t_forcerec        *fr,
+                        const t_commrec         *cr,
+                        const gmx_device_info_t *deviceInfo)
  {
      gmx_bool bIsPPrankUsingGPU;
      char     gpu_err_str[STRLEN];
@@ -3213,7 +3204,7 @@ void free_gpu_resources(const t_forcerec     *fr,
      if (bIsPPrankUsingGPU)
      {
          /* uninitialize GPU (by destroying the context) */
-        if (!free_cuda_gpu(cr->rank_pp_intranode, gpu_err_str, gpu_info, gpu_opt))
+        if (!free_cuda_gpu(deviceInfo, gpu_err_str))
          {
              gmx_warning("On rank %d failed to free GPU #%d: %s",
                          cr->nodeid, get_current_cuda_gpu_device_id(), gpu_err_str);
diff --git a/src/gromacs/mdlib/forcerec.h b/src/gromacs/mdlib/forcerec.h

index 5eaafc4e49f25f82d7fd7bcf856309aa160b61ef..c235a8701560dc752e598dd7799853271457c928 100644 (file)
--- a/src/gromacs/mdlib/forcerec.h
+++ b/src/gromacs/mdlib/forcerec.h
@@ -44,6 +44,7 @@
  #include "gromacs/mdtypes/forcerec.h"
  #include "gromacs/timing/wallcycle.h"
  
+struct gmx_device_info_t;
  struct t_commrec;
  struct t_fcdata;
  struct t_filenm;
@@ -107,6 +108,7 @@ void init_interaction_const_tables(FILE                   *fp,
   * \param[in]  tabpfn      Table potential file for pair interactions
   * \param[in]  tabbfnm     Table potential files for bonded interactions
   * \param[in]  nbpu_opt    Nonbonded Processing Unit (GPU/CPU etc.)
+ * \param[in]  deviceInfo  Info about GPU device to use for short-ranged work
   * \param[in]  bNoSolvOpt  Do not use solvent optimization
   * \param[in]  print_force Print forces for atoms with force >= print_force
   */
@@ -122,6 +124,7 @@ void init_forcerec(FILE                   *fplog,
                     const char             *tabpfn,
                     const t_filenm         *tabbfnm,
                     const char             *nbpu_opt,
+                   gmx_device_info_t      *deviceInfo,
                     gmx_bool                bNoSolvOpt,
                     real                    print_force);
  
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu

index 35774bec6f1df0e3b60c7a94c671fc2020b00b78..739b582c1e895e0a99f33818f664a286ed7ffd49 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
@@ -148,7 +148,7 @@ static bool always_prune = (getenv("GMX_GPU_ALWAYS_PRUNE") != NULL);
  
  
  /*! Returns the number of blocks to be used for the nonbonded GPU kernel. */
-static inline int calc_nb_kernel_nblock(int nwork_units, gmx_device_info_t *dinfo)
+static inline int calc_nb_kernel_nblock(int nwork_units, const gmx_device_info_t *dinfo)
  {
      int max_grid_x_size;
  
@@ -231,7 +231,7 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int
                                                         int                                  evdwtype,
                                                         bool                                 bDoEne,
                                                         bool                                 bDoPrune,
-                                                       struct gmx_device_info_t gmx_unused *devInfo)
+                                                       const gmx_device_info_t gmx_unused  *devInfo)
  {
      nbnxn_cu_kfunc_ptr_t res;
  
@@ -271,7 +271,7 @@ static inline nbnxn_cu_kfunc_ptr_t select_nbnxn_kernel(int
  }
  
  /*! Calculates the amount of shared memory required by the CUDA kernel in use. */
-static inline int calc_shmem_required(const int num_threads_z, gmx_device_info_t gmx_unused *dinfo, const cu_nbparam_t *nbp)
+static inline int calc_shmem_required(const int num_threads_z, const gmx_device_info_t gmx_unused *dinfo, const cu_nbparam_t *nbp)
  {
      int shmem;
  
@@ -725,7 +725,7 @@ const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_coulomb_
  
  /*! Set up the cache configuration for the non-bonded kernels,
   */
-void nbnxn_cuda_set_cacheconfig(gmx_device_info_t *devinfo)
+void nbnxn_cuda_set_cacheconfig(const gmx_device_info_t *devinfo)
  {
      cudaError_t stat;
  
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index e29fccb3031cd04db85f2ee0b238950ea27298f3..52e255de560dc788b5f363beb5f303b8e9e93251 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -78,7 +78,7 @@ static bool bUseCudaEventBlockingSync = false; /* makes the CPU thread block */
  static unsigned int gpu_min_ci_balanced_factor = 44;
  
  /* Functions from nbnxn_cuda.cu */
-extern void nbnxn_cuda_set_cacheconfig(gmx_device_info_t *devinfo);
+extern void nbnxn_cuda_set_cacheconfig(const gmx_device_info_t *devinfo);
  extern const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_nbfp_texref();
  extern const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_nbfp_comb_texref();
  extern const struct texture<float, 1, cudaReadModeElementType> &nbnxn_cuda_get_coulomb_tab_texref();
@@ -584,19 +584,15 @@ static void nbnxn_cuda_init_const(gmx_nbnxn_cuda_t               *nb,
  }
  
  void nbnxn_gpu_init(gmx_nbnxn_cuda_t         **p_nb,
-                    const gmx_gpu_info_t      *gpu_info,
-                    const gmx_gpu_opt_t       *gpu_opt,
+                    const gmx_device_info_t   *deviceInfo,
                      const interaction_const_t *ic,
                      nonbonded_verlet_group_t  *nbv_grp,
-                    int                        my_gpu_index,
                      int                        /*rank*/,
                      gmx_bool                   bLocalAndNonlocal)
  {
      cudaError_t       stat;
      gmx_nbnxn_cuda_t *nb;
  
-    assert(gpu_info);
-
      if (p_nb == NULL)
      {
          return;
@@ -624,7 +620,7 @@ void nbnxn_gpu_init(gmx_nbnxn_cuda_t         **p_nb,
      init_plist(nb->plist[eintLocal]);
  
      /* set device info, just point it to the right GPU among the detected ones */
-    nb->dev_info = &gpu_info->gpu_dev[get_gpu_device_id(*gpu_info, gpu_opt, my_gpu_index)];
+    nb->dev_info = deviceInfo;
  
      /* local/non-local GPU streams */
      stat = cudaStreamCreate(&nb->stream[eintLocal]);
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h

index 09eccc34c0d32d2aab911ea08df8b3058c52f612..9bdb258d06ae8e7040ecaab844a1f9cf985f8507 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h
@@ -237,7 +237,7 @@ struct cu_timers
   */
  struct gmx_nbnxn_cuda_t
  {
-    struct gmx_device_info_t *dev_info;       /**< CUDA device information                              */
+    const gmx_device_info_t  *dev_info;       /**< CUDA device information                              */
      bool                      bUseTwoStreams; /**< true if doing both local/non-local NB work on GPU    */
      cu_atomdata_t            *atdat;          /**< atom data                                            */
      cu_nbparam_t             *nbparam;        /**< parameters required for the non-bonded calc.         */
diff --git a/src/gromacs/mdlib/nbnxn_gpu_data_mgmt.h b/src/gromacs/mdlib/nbnxn_gpu_data_mgmt.h

index 0e16d5cfd9f6155d92b21eeb69f88285a1ec6716..cddb0f05a496cbba19f74386cbf24eeeb04c9788 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_gpu_data_mgmt.h
+++ b/src/gromacs/mdlib/nbnxn_gpu_data_mgmt.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -56,16 +56,13 @@ struct nbnxn_pairlist_t;
  struct nbnxn_atomdata_t;
  struct gmx_wallclock_gpu_t;
  struct gmx_gpu_info_t;
-struct gmx_gpu_opt_t;
  
  /** Initializes the data structures related to GPU nonbonded calculations. */
  GPU_FUNC_QUALIFIER
  void nbnxn_gpu_init(gmx_nbnxn_gpu_t gmx_unused            **p_nb,
-                    const struct gmx_gpu_info_t gmx_unused *gpu_info,
-                    const struct gmx_gpu_opt_t gmx_unused  *gpu_opt,
+                    const gmx_device_info_t gmx_unused     *deviceInfo,
                      const interaction_const_t gmx_unused   *ic,
                      nonbonded_verlet_group_t gmx_unused    *nbv_grp,
-                    int gmx_unused                          my_gpu_index,
                      int gmx_unused                          rank,
                      /* true if both local and non-local are done on GPU */
                      gmx_bool gmx_unused                     bLocalAndNonlocal) GPU_FUNC_TERM
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp

index 633e5d325264c613e7bcdf30f250de3c263c2e32..863927f0a4fc689593f0f5019c9ea1726630b0fc 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp
@@ -94,7 +94,7 @@ static bool always_prune = (getenv("GMX_GPU_ALWAYS_PRUNE") != NULL);
  
  /*! \brief Validates the input global work size parameter.
   */
-static inline void validate_global_work_size(size_t *global_work_size, int work_dim, gmx_device_info_t *dinfo)
+static inline void validate_global_work_size(size_t *global_work_size, int work_dim, const gmx_device_info_t *dinfo)
  {
      cl_uint device_size_t_size_bits;
      cl_uint host_size_t_size_bits;
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp

index 23566d8db8b379eef7a5f11093301c55e3fa0b0c..a7b95e49a98b03f2275caa5baa2fb0978117ff0a 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
@@ -687,11 +687,9 @@ static void nbnxn_ocl_init_const(gmx_nbnxn_ocl_t                *nb,
  
  //! This function is documented in the header file
  void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
-                    const gmx_gpu_info_t      *gpu_info,
-                    const gmx_gpu_opt_t       *gpu_opt,
+                    const gmx_device_info_t   *deviceInfo,
                      const interaction_const_t *ic,
                      nonbonded_verlet_group_t  *nbv_grp,
-                    int                        my_gpu_index,
                      int                        rank,
                      gmx_bool                   bLocalAndNonlocal)
  {
@@ -699,8 +697,6 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
      cl_int                      cl_error;
      cl_command_queue_properties queue_properties;
  
-    assert(gpu_info);
-    assert(gpu_opt);
      assert(ic);
  
      if (p_nb == NULL)
@@ -723,7 +719,7 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
      snew(nb->timings, 1);
  
      /* set device info, just point it to the right GPU among the detected ones */
-    nb->dev_info = gpu_info->gpu_dev + gpu_opt->dev_use[my_gpu_index];
+    nb->dev_info = deviceInfo;
      snew(nb->dev_rundata, 1);
  
      /* init to NULL the debug buffer */
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h

index 0fa212584e7641ac0200981124aa1782c19b061c..7d81906d2ac5483b02a3497a821c53d5ea941582 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -271,7 +271,7 @@ typedef struct cl_timers
   */
  struct gmx_nbnxn_ocl_t
  {
-    struct gmx_device_info_t         *dev_info;    /**< OpenCL device information                              */
+    const gmx_device_info_t          *dev_info;    /**< OpenCL device information                              */
      struct gmx_device_runtime_data_t *dev_rundata; /**< OpenCL runtime data (context, kernels)                 */
  
      /**< Pointers to non-bonded kernel functions
diff --git a/src/gromacs/mdtypes/forcerec.h b/src/gromacs/mdtypes/forcerec.h

index 7cd11fad44f1b5c2da7f08bc706a1cf9b0d5227d..62f5030711ef961d34d5014ed27ea4c696dc2ee3 100644 (file)
--- a/src/gromacs/mdtypes/forcerec.h
+++ b/src/gromacs/mdtypes/forcerec.h
@@ -59,8 +59,6 @@ struct t_forcetable;
  struct t_nblist;
  struct t_nblists;
  struct t_QMMMrec;
-struct gmx_hw_info_t;
-struct gmx_gpu_opt_t;
  
  #ifdef __cplusplus
  extern "C" {
@@ -174,8 +172,6 @@ struct t_forcerec {
      rvec                        posres_com;
      rvec                        posres_comB;
  
-    const struct gmx_hw_info_t *hwinfo;
-    const struct gmx_gpu_opt_t *gpu_opt;
      gmx_bool                    use_simd_kernels;
  
      /* Interaction for calculated in kernels. In many cases this is similar to
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp

index 7cc5d5ad578ca9bc80db1d6b2494cc390ea1f942..c798109795a033f8f3abea2e8c326c99a837fe36 100644 (file)
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -1144,10 +1144,21 @@ int Mdrunner::mdrunner()
      check_resource_division_efficiency(hwinfo, hw_opt, hw_opt.gpu_opt.n_dev_use, Flags & MD_NTOMPSET,
                                         cr, mdlog);
  
+    gmx_device_info_t *shortRangedDeviceInfo = nullptr;
+    int                shortRangedDeviceId   = -1;
+    if (cr->duty & DUTY_PP)
+    {
+        if (willUsePhysicalGpu)
+        {
+            shortRangedDeviceId   = hw_opt.gpu_opt.dev_use[cr->nrank_pp_intranode];
+            shortRangedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, shortRangedDeviceId);
+        }
+    }
+
      if (DOMAINDECOMP(cr))
      {
          /* When we share GPUs over ranks, we need to know this for the DLB */
-        dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt);
+        dd_setup_dlb_resource_sharing(cr, shortRangedDeviceId);
      }
  
      /* getting number of PP/PME threads
@@ -1187,8 +1198,6 @@ int Mdrunner::mdrunner()
  
          /* Initiate forcerecord */
          fr                 = mk_forcerec();
-        fr->hwinfo         = hwinfo;
-        fr->gpu_opt        = &hw_opt.gpu_opt;
          fr->forceProviders = mdModules.initForceProviders();
          init_forcerec(fplog, mdlog, fr, fcd,
                        inputrec, mtop, cr, box,
@@ -1196,6 +1205,7 @@ int Mdrunner::mdrunner()
                        opt2fn("-tablep", nfile, fnm),
                        getFilenm("-tableb", nfile, fnm),
                        nbpu_opt,
+                      shortRangedDeviceInfo,
                        FALSE,
                        pforce);
  
@@ -1425,7 +1435,7 @@ int Mdrunner::mdrunner()
      }
  
      /* Free GPU memory and context */
-    free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : nullptr);
+    free_gpu_resources(fr, cr, shortRangedDeviceInfo);
  
      if (doMembed)
      {
author	Mark Abraham <mark.j.abraham@gmail.com>
	Fri, 4 Aug 2017 11:52:33 +0000 (13:52 +0200)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Sun, 6 Aug 2017 15:58:05 +0000 (17:58 +0200)
src/gromacs/domdec/domdec.cpp		patch \| blob \| history
src/gromacs/domdec/domdec.h		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils.cu		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils.h		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils_ocl.cpp		patch \| blob \| history
src/gromacs/gpu_utils/ocl_compiler.h		patch \| blob \| history
src/gromacs/mdlib/force.h		patch \| blob \| history
src/gromacs/mdlib/forcerec.cpp		patch \| blob \| history
src/gromacs/mdlib/forcerec.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_gpu_data_mgmt.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_types.h		patch \| blob \| history
src/gromacs/mdtypes/forcerec.h		patch \| blob \| history
src/programs/mdrun/runner.cpp		patch \| blob \| history