src/gromacs/gpu_utils/gpu_utils.cu

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014,2015,2016, The GROMACS development team.
   5  * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \file
  37  *  \brief Define functions for detection and initialization for CUDA devices.
  38  *
  39  *  \author Szilard Pall <pall.szilard@gmail.com>
  40  */
  41
  42 #include "gmxpre.h"
  43
  44 #include "gpu_utils.h"
  45
  46 #include <assert.h>
  47 #include <stdio.h>
  48 #include <stdlib.h>
  49
  50 #include <cuda_profiler_api.h>
  51
  52 #include "gromacs/gpu_utils/cudautils.cuh"
  53 #include "gromacs/gpu_utils/device_context.h"
  54 #include "gromacs/gpu_utils/device_stream.h"
  55 #include "gromacs/gpu_utils/pmalloc_cuda.h"
  56 #include "gromacs/hardware/gpu_hw_info.h"
  57 #include "gromacs/utility/basedefinitions.h"
  58 #include "gromacs/utility/cstringutil.h"
  59 #include "gromacs/utility/exceptions.h"
  60 #include "gromacs/utility/fatalerror.h"
  61 #include "gromacs/utility/gmxassert.h"
  62 #include "gromacs/utility/logger.h"
  63 #include "gromacs/utility/programcontext.h"
  64 #include "gromacs/utility/smalloc.h"
  65 #include "gromacs/utility/snprintf.h"
  66 #include "gromacs/utility/stringutil.h"
  67
  68 static bool cudaProfilerRun = ((getenv("NVPROF_ID") != nullptr));
  69
  70 bool isHostMemoryPinned(const void* h_ptr)
  71 {
  72     cudaPointerAttributes memoryAttributes;
  73     cudaError_t           stat = cudaPointerGetAttributes(&memoryAttributes, h_ptr);
  74
  75     bool isPinned = false;
  76     switch (stat)
  77     {
  78         case cudaSuccess:
  79             // In CUDA 11.0, the field called memoryType in
  80             // cudaPointerAttributes was replaced by a field called
  81             // type, along with a documented change of behavior when the
  82             // pointer passed to cudaPointerGetAttributes is to
  83             // non-registered host memory. That change means that this
  84             // code needs conditional compilation and different
  85             // execution paths to function with all supported versions.
  86 #if CUDART_VERSION < 11 * 1000
  87             isPinned = true;
  88 #else
  89             isPinned = (memoryAttributes.type == cudaMemoryTypeHost);
  90 #endif
  91             break;
  92
  93         case cudaErrorInvalidValue:
  94             // If the buffer was not pinned, then it will not be recognized by CUDA at all
  95             isPinned = false;
  96             // Reset the last error status
  97             cudaGetLastError();
  98             break;
  99
 100         default: CU_RET_ERR(stat, "Unexpected CUDA error");
 101     }
 102     return isPinned;
 103 }
 104
 105 void startGpuProfiler(void)
 106 {
 107     /* The NVPROF_ID environment variable is set by nvprof and indicates that
 108        mdrun is executed in the CUDA profiler.
 109        If nvprof was run is with "--profile-from-start off", the profiler will
 110        be started here. This way we can avoid tracing the CUDA events from the
 111        first part of the run. Starting the profiler again does nothing.
 112      */
 113     if (cudaProfilerRun)
 114     {
 115         cudaError_t stat;
 116         stat = cudaProfilerStart();
 117         CU_RET_ERR(stat, "cudaProfilerStart failed");
 118     }
 119 }
 120
 121 void stopGpuProfiler(void)
 122 {
 123     /* Stopping the nvidia here allows us to eliminate the subsequent
 124        API calls from the trace, e.g. uninitialization and cleanup. */
 125     if (cudaProfilerRun)
 126     {
 127         cudaError_t stat;
 128         stat = cudaProfilerStop();
 129         CU_RET_ERR(stat, "cudaProfilerStop failed");
 130     }
 131 }
 132
 133 void resetGpuProfiler(void)
 134 {
 135     /* With CUDA <=7.5 the profiler can't be properly reset; we can only start
 136      *  the profiling here (can't stop it) which will achieve the desired effect if
 137      *  the run was started with the profiling disabled.
 138      *
 139      * TODO: add a stop (or replace it with reset) when this will work correctly in CUDA.
 140      * stopGpuProfiler();
 141      */
 142     if (cudaProfilerRun)
 143     {
 144         startGpuProfiler();
 145     }
 146 }
 147
 148 /*! \brief Check status returned from peer access CUDA call, and error out or warn appropriately
 149  * \param[in] stat           CUDA call return status
 150  * \param[in] gpuA           ID for GPU initiating peer access call
 151  * \param[in] gpuB           ID for remote GPU
 152  * \param[in] mdlog          Logger object
 153  * \param[in] cudaCallName   name of CUDA peer access call
 154  */
 155 static void peerAccessCheckStat(const cudaError_t    stat,
 156                                 const int            gpuA,
 157                                 const int            gpuB,
 158                                 const gmx::MDLogger& mdlog,
 159                                 const char*          cudaCallName)
 160 {
 161     if ((stat == cudaErrorInvalidDevice) || (stat == cudaErrorInvalidValue))
 162     {
 163         std::string errorString =
 164                 gmx::formatString("%s from GPU %d to GPU %d failed", cudaCallName, gpuA, gpuB);
 165         CU_RET_ERR(stat, errorString.c_str());
 166     }
 167     if (stat != cudaSuccess)
 168     {
 169         GMX_LOG(mdlog.warning)
 170                 .asParagraph()
 171                 .appendTextFormatted(
 172                         "GPU peer access not enabled between GPUs %d and %d due to unexpected "
 173                         "return value from %s: %s",
 174                         gpuA, gpuB, cudaCallName, cudaGetErrorString(stat));
 175     }
 176 }
 177
 178 void setupGpuDevicePeerAccess(const std::vector<int>& gpuIdsToUse, const gmx::MDLogger& mdlog)
 179 {
 180     cudaError_t stat;
 181
 182     // take a note of currently-set GPU
 183     int currentGpu;
 184     stat = cudaGetDevice(&currentGpu);
 185     CU_RET_ERR(stat, "cudaGetDevice in setupGpuDevicePeerAccess failed");
 186
 187     std::string message = gmx::formatString(
 188             "Note: Peer access enabled between the following GPU pairs in the node:\n ");
 189     bool peerAccessEnabled = false;
 190
 191     for (unsigned int i = 0; i < gpuIdsToUse.size(); i++)
 192     {
 193         int gpuA = gpuIdsToUse[i];
 194         stat     = cudaSetDevice(gpuA);
 195         if (stat != cudaSuccess)
 196         {
 197             GMX_LOG(mdlog.warning)
 198                     .asParagraph()
 199                     .appendTextFormatted(
 200                             "GPU peer access not enabled due to unexpected return value from "
 201                             "cudaSetDevice(%d): %s",
 202                             gpuA, cudaGetErrorString(stat));
 203             return;
 204         }
 205         for (unsigned int j = 0; j < gpuIdsToUse.size(); j++)
 206         {
 207             if (j != i)
 208             {
 209                 int gpuB          = gpuIdsToUse[j];
 210                 int canAccessPeer = 0;
 211                 stat              = cudaDeviceCanAccessPeer(&canAccessPeer, gpuA, gpuB);
 212                 peerAccessCheckStat(stat, gpuA, gpuB, mdlog, "cudaDeviceCanAccessPeer");
 213
 214                 if (canAccessPeer)
 215                 {
 216                     stat = cudaDeviceEnablePeerAccess(gpuB, 0);
 217                     peerAccessCheckStat(stat, gpuA, gpuB, mdlog, "cudaDeviceEnablePeerAccess");
 218
 219                     message           = gmx::formatString("%s%d->%d ", message.c_str(), gpuA, gpuB);
 220                     peerAccessEnabled = true;
 221                 }
 222             }
 223         }
 224     }
 225
 226     // re-set GPU to that originally set
 227     stat = cudaSetDevice(currentGpu);
 228     if (stat != cudaSuccess)
 229     {
 230         CU_RET_ERR(stat, "cudaSetDevice in setupGpuDevicePeerAccess failed");
 231         return;
 232     }
 233
 234     if (peerAccessEnabled)
 235     {
 236         GMX_LOG(mdlog.info).asParagraph().appendTextFormatted("%s", message.c_str());
 237     }
 238 }