From: Alan Gray <alang@nvidia.com>
Date: Wed, 4 Sep 2019 12:41:21 +0000 (-0700)
Subject: Enable GPU Peer Access in GPU Utilities
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=643e75dac184951c31035261a488f0eef14a9b36;p=alexxy%2Fgromacs.git

Enable GPU Peer Access in GPU Utilities

When using the new GPU communication features, enabling peer access
between pairs of GPUs (where supported) will allow peer-to-peer
communications. In this patch the CUDA code to enable peer access is
introduced into central GPU utilities and called from do_md.

Implements #3087

Change-Id: If668366b76d49f7b624eedb501f8af19135c4386
---

diff --git a/src/gromacs/gpu_utils/gpu_utils.cu b/src/gromacs/gpu_utils/gpu_utils.cu
index 8db9065d43..dec064397f 100644
--- a/src/gromacs/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gpu_utils/gpu_utils.cu
@@ -56,6 +56,7 @@
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/logger.h"
 #include "gromacs/utility/programcontext.h"
 #include "gromacs/utility/smalloc.h"
 #include "gromacs/utility/snprintf.h"
@@ -545,3 +546,80 @@ int gpu_info_get_stat(const gmx_gpu_info_t &info, int index)
 {
     return info.gpu_dev[index].stat;
 }
+
+/*! \brief Check status returned from peer access CUDA call, and error out or warn appropriately
+ * \param[in] stat           CUDA call return status
+ * \param[in] gpuA           ID for GPU initiating peer access call
+ * \param[in] gpuB           ID for remote GPU
+ * \param[in] mdlog          Logger object
+ * \param[in] cudaCallName   name of CUDA peer access call
+ */
+static void peerAccessCheckStat(const cudaError_t stat, const int gpuA, const int gpuB, const gmx::MDLogger &mdlog, const char *cudaCallName)
+{
+    if ((stat == cudaErrorInvalidDevice) || (stat == cudaErrorInvalidValue))
+    {
+        std::string errorString = gmx::formatString("%s from GPU %d to GPU %d failed", cudaCallName, gpuA, gpuB);
+        CU_RET_ERR(stat, errorString.c_str());
+    }
+    if (stat != cudaSuccess)
+    {
+        GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted("GPU peer access not enabled between GPUs %d and %d due to unexpected return value from %s: %s",
+                                                                 gpuA, gpuB, cudaCallName, cudaGetErrorString(stat));
+    }
+}
+
+void setupGpuDevicePeerAccess(const std::vector<int> &gpuIdsToUse, const gmx::MDLogger &mdlog)
+{
+    cudaError_t stat;
+
+    // take a note of currently-set GPU
+    int currentGpu;
+    stat = cudaGetDevice(&currentGpu);
+    CU_RET_ERR(stat, "cudaGetDevice in setupGpuDevicePeerAccess failed");
+
+    std::string message           = gmx::formatString("Note: Peer access enabled between the following GPU pairs in the node:\n ");
+    bool        peerAccessEnabled = false;
+
+    for (unsigned int i = 0; i < gpuIdsToUse.size(); i++)
+    {
+        int gpuA = gpuIdsToUse[i];
+        stat = cudaSetDevice(gpuA);
+        if (stat != cudaSuccess)
+        {
+            GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted("GPU peer access not enabled due to unexpected return value from cudaSetDevice(%d): %s", gpuA, cudaGetErrorString(stat));
+            return;
+        }
+        for (unsigned int j = 0; j < gpuIdsToUse.size(); j++)
+        {
+            if (j != i)
+            {
+                int gpuB          = gpuIdsToUse[j];
+                int canAccessPeer = 0;
+                stat = cudaDeviceCanAccessPeer(&canAccessPeer, gpuA, gpuB);
+                peerAccessCheckStat(stat, gpuA, gpuB, mdlog, "cudaDeviceCanAccessPeer");
+
+                if (canAccessPeer)
+                {
+                    stat = cudaDeviceEnablePeerAccess(gpuB, 0);
+                    peerAccessCheckStat(stat, gpuA, gpuB, mdlog, "cudaDeviceEnablePeerAccess");
+
+                    message           = gmx::formatString("%s%d->%d ", message.c_str(), gpuA, gpuB);
+                    peerAccessEnabled = true;
+                }
+            }
+        }
+    }
+
+    //re-set GPU to that originally set
+    stat = cudaSetDevice(currentGpu);
+    if (stat != cudaSuccess)
+    {
+        CU_RET_ERR(stat, "cudaSetDevice in setupGpuDevicePeerAccess failed");
+        return;
+    }
+
+    if (peerAccessEnabled)
+    {
+        GMX_LOG(mdlog.info).asParagraph().appendTextFormatted("%s", message.c_str());
+    }
+}
diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h
index b0e6d86615..edf3f17486 100644
--- a/src/gromacs/gpu_utils/gpu_utils.h
+++ b/src/gromacs/gpu_utils/gpu_utils.h
@@ -58,6 +58,7 @@ struct gmx_gpu_info_t;
 
 namespace gmx
 {
+class MDLogger;
 }
 
 //! Enum which is only used to describe transfer calls at the moment
@@ -272,4 +273,12 @@ void stopGpuProfiler() CUDA_FUNC_TERM;
 CUDA_FUNC_QUALIFIER
 bool isHostMemoryPinned(const void *CUDA_FUNC_ARGUMENT(h_ptr)) CUDA_FUNC_TERM_WITH_RETURN(false);
 
+/*! \brief Enable peer access between GPUs where supported
+ * \param[in] gpuIdsToUse   List of GPU IDs in use
+ * \param[in] mdlog         Logger object
+ */
+CUDA_FUNC_QUALIFIER
+void setupGpuDevicePeerAccess(const std::vector<int>  &CUDA_FUNC_ARGUMENT(gpuIdsToUse),
+                              const gmx::MDLogger     &CUDA_FUNC_ARGUMENT(mdlog)) CUDA_FUNC_TERM;
+
 #endif
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp
index ee79084fb4..2cc4d69b08 100644
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -1243,6 +1243,15 @@ int Mdrunner::mdrunner()
                                   *hwinfo->hardwareTopology,
                                   physicalNodeComm, mdlog);
 
+    // Enable Peer access between GPUs where available
+    // Only for DD, only master PP rank needs to perform setup, and only if thread MPI plus
+    // any of the GPU communication features are active.
+    if (DOMAINDECOMP(cr) && MASTER(cr) && thisRankHasDuty(cr, DUTY_PP) && GMX_THREAD_MPI &&
+        (devFlags.enableGpuHaloExchange || devFlags.enableGpuPmePPComm))
+    {
+        setupGpuDevicePeerAccess(gpuIdsToUse, mdlog);
+    }
+
     if (hw_opt.threadAffinity != ThreadAffinity::Off)
     {
         /* Before setting affinity, check whether the affinity has changed