src/gromacs/gpu_utils/gpu_utils.cu

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014,2015,2016, The GROMACS development team.
   5  * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \file
  37  *  \brief Define functions for detection and initialization for CUDA devices.
  38  *
  39  *  \author Szilard Pall <pall.szilard@gmail.com>
  40  */
  41
  42 #include "gmxpre.h"
  43
  44 #include "gpu_utils.h"
  45
  46 #include <assert.h>
  47 #include <stdio.h>
  48 #include <stdlib.h>
  49
  50 #include <cuda_profiler_api.h>
  51
  52 #include "gromacs/gpu_utils/cudautils.cuh"
  53 #include "gromacs/gpu_utils/device_context.h"
  54 #include "gromacs/gpu_utils/device_stream.h"
  55 #include "gromacs/hardware/device_information.h"
  56 #include "gromacs/hardware/device_management.h"
  57 #include "gromacs/utility/basedefinitions.h"
  58 #include "gromacs/utility/cstringutil.h"
  59 #include "gromacs/utility/exceptions.h"
  60 #include "gromacs/utility/fatalerror.h"
  61 #include "gromacs/utility/gmxassert.h"
  62 #include "gromacs/utility/logger.h"
  63 #include "gromacs/utility/programcontext.h"
  64 #include "gromacs/utility/smalloc.h"
  65 #include "gromacs/utility/snprintf.h"
  66 #include "gromacs/utility/stringutil.h"
  67
  68 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
  69 static bool cudaProfilerRun = ((getenv("NVPROF_ID") != nullptr));
  70
  71 bool isHostMemoryPinned(const void* h_ptr)
  72 {
  73     cudaPointerAttributes memoryAttributes;
  74     cudaError_t           stat = cudaPointerGetAttributes(&memoryAttributes, h_ptr);
  75
  76     bool isPinned = false;
  77     switch (stat)
  78     {
  79         case cudaSuccess:
  80             // In CUDA 11.0, the field called memoryType in
  81             // cudaPointerAttributes was replaced by a field called
  82             // type, along with a documented change of behavior when the
  83             // pointer passed to cudaPointerGetAttributes is to
  84             // non-registered host memory. That change means that this
  85             // code needs conditional compilation and different
  86             // execution paths to function with all supported versions.
  87 #if CUDART_VERSION < 11 * 1000
  88             isPinned = true;
  89 #else
  90             isPinned = (memoryAttributes.type == cudaMemoryTypeHost);
  91 #endif
  92             break;
  93
  94         case cudaErrorInvalidValue:
  95             // If the buffer was not pinned, then it will not be recognized by CUDA at all
  96             isPinned = false;
  97             // Reset the last error status
  98             cudaGetLastError();
  99             break;
 100
 101         default: CU_RET_ERR(stat, "Unexpected CUDA error");
 102     }
 103     return isPinned;
 104 }
 105
 106 void startGpuProfiler()
 107 {
 108     /* The NVPROF_ID environment variable is set by nvprof and indicates that
 109        mdrun is executed in the CUDA profiler.
 110        If nvprof was run is with "--profile-from-start off", the profiler will
 111        be started here. This way we can avoid tracing the CUDA events from the
 112        first part of the run. Starting the profiler again does nothing.
 113      */
 114     if (cudaProfilerRun)
 115     {
 116         cudaError_t stat;
 117         stat = cudaProfilerStart();
 118         CU_RET_ERR(stat, "cudaProfilerStart failed");
 119     }
 120 }
 121
 122 void stopGpuProfiler()
 123 {
 124     /* Stopping the nvidia here allows us to eliminate the subsequent
 125        API calls from the trace, e.g. uninitialization and cleanup. */
 126     if (cudaProfilerRun)
 127     {
 128         cudaError_t stat;
 129         stat = cudaProfilerStop();
 130         CU_RET_ERR(stat, "cudaProfilerStop failed");
 131     }
 132 }
 133
 134 void resetGpuProfiler()
 135 {
 136     /* With CUDA <=7.5 the profiler can't be properly reset; we can only start
 137      *  the profiling here (can't stop it) which will achieve the desired effect if
 138      *  the run was started with the profiling disabled.
 139      *
 140      * TODO: add a stop (or replace it with reset) when this will work correctly in CUDA.
 141      * stopGpuProfiler();
 142      */
 143     if (cudaProfilerRun)
 144     {
 145         startGpuProfiler();
 146     }
 147 }
 148
 149 /*! \brief Check and act on status returned from peer access CUDA call
 150  *
 151  * If status is "cudaSuccess", we continue. If
 152  * "cudaErrorPeerAccessAlreadyEnabled", then peer access has already
 153  * been enabled so we ignore. If "cudaErrorInvalidDevice" then the
 154  * run is trying to access an invalid GPU, so we throw an error. If
 155  * "cudaErrorInvalidValue" then there is a problem with the arguments
 156  * to the CUDA call, and we throw an error. These cover all expected
 157  * statuses, but if any other is returned we issue a warning and
 158  * continue.
 159  *
 160  * \param[in] stat           CUDA call return status
 161  * \param[in] gpuA           ID for GPU initiating peer access call
 162  * \param[in] gpuB           ID for remote GPU
 163  * \param[in] mdlog          Logger object
 164  * \param[in] cudaCallName   name of CUDA peer access call
 165  */
 166 static void peerAccessCheckStat(const cudaError_t    stat,
 167                                 const int            gpuA,
 168                                 const int            gpuB,
 169                                 const gmx::MDLogger& mdlog,
 170                                 const char*          cudaCallName)
 171 {
 172
 173     if (stat == cudaErrorPeerAccessAlreadyEnabled)
 174     {
 175         // Since peer access has already been enabled, this error can safely be ignored.
 176         // Now clear the error internally within CUDA:
 177         cudaGetLastError();
 178         return;
 179     }
 180     if ((stat == cudaErrorInvalidDevice) || (stat == cudaErrorInvalidValue))
 181     {
 182         std::string errorString =
 183                 gmx::formatString("%s from GPU %d to GPU %d failed", cudaCallName, gpuA, gpuB);
 184         CU_RET_ERR(stat, errorString);
 185     }
 186     if (stat != cudaSuccess)
 187     {
 188         GMX_LOG(mdlog.warning)
 189                 .asParagraph()
 190                 .appendTextFormatted(
 191                         "GPU peer access not enabled between GPUs %d and %d due to unexpected "
 192                         "return value from %s. %s",
 193                         gpuA,
 194                         gpuB,
 195                         cudaCallName,
 196                         gmx::getDeviceErrorString(stat).c_str());
 197         // Clear the error internally within CUDA
 198         cudaGetLastError();
 199     }
 200 }
 201
 202 void setupGpuDevicePeerAccess(const std::vector<int>& gpuIdsToUse, const gmx::MDLogger& mdlog)
 203 {
 204     cudaError_t stat;
 205
 206     // take a note of currently-set GPU
 207     int currentGpu;
 208     stat = cudaGetDevice(&currentGpu);
 209     CU_RET_ERR(stat, "cudaGetDevice in setupGpuDevicePeerAccess failed");
 210
 211     std::string message = gmx::formatString(
 212             "Note: Peer access enabled between the following GPU pairs in the node:\n ");
 213     bool peerAccessEnabled = false;
 214
 215     for (unsigned int i = 0; i < gpuIdsToUse.size(); i++)
 216     {
 217         int gpuA = gpuIdsToUse[i];
 218         stat     = cudaSetDevice(gpuA);
 219         if (stat != cudaSuccess)
 220         {
 221             GMX_LOG(mdlog.warning)
 222                     .asParagraph()
 223                     .appendTextFormatted(
 224                             "GPU peer access not enabled due to unexpected return value from "
 225                             "cudaSetDevice(%d). %s",
 226                             gpuA,
 227                             gmx::getDeviceErrorString(stat).c_str());
 228             return;
 229         }
 230         for (unsigned int j = 0; j < gpuIdsToUse.size(); j++)
 231         {
 232             if (j != i)
 233             {
 234                 int gpuB          = gpuIdsToUse[j];
 235                 int canAccessPeer = 0;
 236                 stat              = cudaDeviceCanAccessPeer(&canAccessPeer, gpuA, gpuB);
 237                 peerAccessCheckStat(stat, gpuA, gpuB, mdlog, "cudaDeviceCanAccessPeer");
 238
 239                 if (canAccessPeer)
 240                 {
 241                     stat = cudaDeviceEnablePeerAccess(gpuB, 0);
 242                     peerAccessCheckStat(stat, gpuA, gpuB, mdlog, "cudaDeviceEnablePeerAccess");
 243
 244                     message           = gmx::formatString("%s%d->%d ", message.c_str(), gpuA, gpuB);
 245                     peerAccessEnabled = true;
 246                 }
 247             }
 248         }
 249     }
 250
 251     // re-set GPU to that originally set
 252     stat = cudaSetDevice(currentGpu);
 253     if (stat != cudaSuccess)
 254     {
 255         CU_RET_ERR(stat, "cudaSetDevice in setupGpuDevicePeerAccess failed");
 256         return;
 257     }
 258
 259     if (peerAccessEnabled)
 260     {
 261         GMX_LOG(mdlog.info).asParagraph().appendTextFormatted("%s", message.c_str());
 262     }
 263 }