2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014,2015,2016, The GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Define functions for detection and initialization for CUDA devices.
39 * \author Szilard Pall <pall.szilard@gmail.com>
44 #include "gpu_utils.h"
50 #include <cuda_profiler_api.h>
52 #include "gromacs/gpu_utils/cudautils.cuh"
53 #include "gromacs/gpu_utils/device_context.h"
54 #include "gromacs/gpu_utils/device_stream.h"
55 #include "gromacs/hardware/device_information.h"
56 #include "gromacs/hardware/device_management.h"
57 #include "gromacs/utility/basedefinitions.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/exceptions.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/gmxassert.h"
62 #include "gromacs/utility/logger.h"
63 #include "gromacs/utility/programcontext.h"
64 #include "gromacs/utility/smalloc.h"
65 #include "gromacs/utility/snprintf.h"
66 #include "gromacs/utility/stringutil.h"
68 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
69 static bool cudaProfilerRun = ((getenv("NVPROF_ID") != nullptr));
71 bool isHostMemoryPinned(const void* h_ptr)
73 cudaPointerAttributes memoryAttributes;
74 cudaError_t stat = cudaPointerGetAttributes(&memoryAttributes, h_ptr);
76 bool isPinned = false;
80 // In CUDA 11.0, the field called memoryType in
81 // cudaPointerAttributes was replaced by a field called
82 // type, along with a documented change of behavior when the
83 // pointer passed to cudaPointerGetAttributes is to
84 // non-registered host memory. That change means that this
85 // code needs conditional compilation and different
86 // execution paths to function with all supported versions.
87 #if CUDART_VERSION < 11 * 1000
90 isPinned = (memoryAttributes.type == cudaMemoryTypeHost);
94 case cudaErrorInvalidValue:
95 // If the buffer was not pinned, then it will not be recognized by CUDA at all
97 // Reset the last error status
101 default: CU_RET_ERR(stat, "Unexpected CUDA error");
106 void startGpuProfiler()
108 /* The NVPROF_ID environment variable is set by nvprof and indicates that
109 mdrun is executed in the CUDA profiler.
110 If nvprof was run is with "--profile-from-start off", the profiler will
111 be started here. This way we can avoid tracing the CUDA events from the
112 first part of the run. Starting the profiler again does nothing.
117 stat = cudaProfilerStart();
118 CU_RET_ERR(stat, "cudaProfilerStart failed");
122 void stopGpuProfiler()
124 /* Stopping the nvidia here allows us to eliminate the subsequent
125 API calls from the trace, e.g. uninitialization and cleanup. */
129 stat = cudaProfilerStop();
130 CU_RET_ERR(stat, "cudaProfilerStop failed");
134 void resetGpuProfiler()
136 /* With CUDA <=7.5 the profiler can't be properly reset; we can only start
137 * the profiling here (can't stop it) which will achieve the desired effect if
138 * the run was started with the profiling disabled.
140 * TODO: add a stop (or replace it with reset) when this will work correctly in CUDA.
149 /*! \brief Check and act on status returned from peer access CUDA call
151 * If status is "cudaSuccess", we continue. If
152 * "cudaErrorPeerAccessAlreadyEnabled", then peer access has already
153 * been enabled so we ignore. If "cudaErrorInvalidDevice" then the
154 * run is trying to access an invalid GPU, so we throw an error. If
155 * "cudaErrorInvalidValue" then there is a problem with the arguments
156 * to the CUDA call, and we throw an error. These cover all expected
157 * statuses, but if any other is returned we issue a warning and
160 * \param[in] stat CUDA call return status
161 * \param[in] gpuA ID for GPU initiating peer access call
162 * \param[in] gpuB ID for remote GPU
163 * \param[in] mdlog Logger object
164 * \param[in] cudaCallName name of CUDA peer access call
166 static void peerAccessCheckStat(const cudaError_t stat,
169 const gmx::MDLogger& mdlog,
170 const char* cudaCallName)
173 if (stat == cudaErrorPeerAccessAlreadyEnabled)
175 // Since peer access has already been enabled, this error can safely be ignored.
176 // Now clear the error internally within CUDA:
180 if ((stat == cudaErrorInvalidDevice) || (stat == cudaErrorInvalidValue))
182 std::string errorString =
183 gmx::formatString("%s from GPU %d to GPU %d failed", cudaCallName, gpuA, gpuB);
184 CU_RET_ERR(stat, errorString);
186 if (stat != cudaSuccess)
188 GMX_LOG(mdlog.warning)
190 .appendTextFormatted(
191 "GPU peer access not enabled between GPUs %d and %d due to unexpected "
192 "return value from %s. %s",
196 gmx::getDeviceErrorString(stat).c_str());
197 // Clear the error internally within CUDA
202 void setupGpuDevicePeerAccess(const std::vector<int>& gpuIdsToUse, const gmx::MDLogger& mdlog)
206 // take a note of currently-set GPU
208 stat = cudaGetDevice(¤tGpu);
209 CU_RET_ERR(stat, "cudaGetDevice in setupGpuDevicePeerAccess failed");
211 std::string message = gmx::formatString(
212 "Note: Peer access enabled between the following GPU pairs in the node:\n ");
213 bool peerAccessEnabled = false;
215 for (unsigned int i = 0; i < gpuIdsToUse.size(); i++)
217 int gpuA = gpuIdsToUse[i];
218 stat = cudaSetDevice(gpuA);
219 if (stat != cudaSuccess)
221 GMX_LOG(mdlog.warning)
223 .appendTextFormatted(
224 "GPU peer access not enabled due to unexpected return value from "
225 "cudaSetDevice(%d). %s",
227 gmx::getDeviceErrorString(stat).c_str());
230 for (unsigned int j = 0; j < gpuIdsToUse.size(); j++)
234 int gpuB = gpuIdsToUse[j];
235 int canAccessPeer = 0;
236 stat = cudaDeviceCanAccessPeer(&canAccessPeer, gpuA, gpuB);
237 peerAccessCheckStat(stat, gpuA, gpuB, mdlog, "cudaDeviceCanAccessPeer");
241 stat = cudaDeviceEnablePeerAccess(gpuB, 0);
242 peerAccessCheckStat(stat, gpuA, gpuB, mdlog, "cudaDeviceEnablePeerAccess");
244 message = gmx::formatString("%s%d->%d ", message.c_str(), gpuA, gpuB);
245 peerAccessEnabled = true;
251 // re-set GPU to that originally set
252 stat = cudaSetDevice(currentGpu);
253 if (stat != cudaSuccess)
255 CU_RET_ERR(stat, "cudaSetDevice in setupGpuDevicePeerAccess failed");
259 if (peerAccessEnabled)
261 GMX_LOG(mdlog.info).asParagraph().appendTextFormatted("%s", message.c_str());