2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Defines routine for collecting all GPU tasks found on ranks of a node.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
44 #include "findallgputasks.h"
51 #include "gromacs/taskassignment/decidegpuusage.h"
52 #include "gromacs/taskassignment/taskassignment.h"
53 #include "gromacs/utility/arrayref.h"
54 #include "gromacs/utility/exceptions.h"
55 #include "gromacs/utility/fatalerror.h"
56 #include "gromacs/utility/gmxassert.h"
57 #include "gromacs/utility/gmxmpi.h"
58 #include "gromacs/utility/physicalnodecommunicator.h"
63 std::vector<GpuTask> findGpuTasksOnThisRank(const bool haveGpusOnThisPhysicalNode,
64 const TaskTarget nonbondedTarget,
65 const TaskTarget pmeTarget,
66 const TaskTarget bondedTarget,
67 const TaskTarget updateTarget,
68 const bool useGpuForNonbonded,
69 const bool useGpuForPme,
70 const bool rankHasPpTask,
71 const bool rankHasPmeTask)
73 std::vector<GpuTask> gpuTasksOnThisRank;
76 if (useGpuForNonbonded)
78 // Note that any bonded tasks on a GPU always accompany a
80 if (haveGpusOnThisPhysicalNode)
82 gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
84 else if (nonbondedTarget == TaskTarget::Gpu)
87 "Cannot run short-ranged nonbonded interactions on a GPU because no GPU "
90 else if (bondedTarget == TaskTarget::Gpu)
93 "Cannot run bonded interactions on a GPU because no GPU is detected.");
95 else if (updateTarget == TaskTarget::Gpu)
98 "Cannot run coordinate update on a GPU because no GPU is detected.");
106 if (haveGpusOnThisPhysicalNode)
108 gpuTasksOnThisRank.push_back(GpuTask::Pme);
110 else if (pmeTarget == TaskTarget::Gpu)
112 gmx_fatal(FARGS, "Cannot run PME on a GPU because no GPU is detected.");
116 return gpuTasksOnThisRank;
122 //! Constant used to help minimize preprocessing of code.
123 constexpr bool g_usingMpi = GMX_MPI;
125 //! Helper function to prepare to all-gather the vector of non-bonded tasks on this node.
126 std::vector<int> allgather(const int& input, int numRanks, MPI_Comm communicator)
128 std::vector<int> result(numRanks);
129 if (g_usingMpi && numRanks > 1)
131 // TODO This works as an MPI_Allgather, but thread-MPI does
132 // not implement that. It's only intra-node communication, and
133 // happens rarely, so not worth optimizing (yet). Also
134 // thread-MPI segfaults with 1 rank.
137 // Calling a C API with the const T * from data() doesn't seem
138 // to compile warning-free with all versions of MPI headers.
140 // TODO Make an allgather template to deal with this nonsense.
141 MPI_Gather(const_cast<int*>(&input), 1, MPI_INT, const_cast<int*>(result.data()), 1, MPI_INT, root, communicator);
142 MPI_Bcast(const_cast<int*>(result.data()), result.size(), MPI_INT, root, communicator);
144 GMX_UNUSED_VALUE(communicator);
155 //! Helper function to compute allgatherv displacements.
156 std::vector<int> computeDisplacements(ArrayRef<const int> extentOnEachRank, int numRanks)
158 std::vector<int> displacements(numRanks + 1);
159 displacements[0] = 0;
161 std::begin(extentOnEachRank), std::end(extentOnEachRank), std::begin(displacements) + 1);
162 return displacements;
165 //! Helper function to all-gather the vector of all GPU tasks on ranks of this node.
166 std::vector<GpuTask> allgatherv(ArrayRef<const GpuTask> input,
167 ArrayRef<const int> extentOnEachRank,
168 ArrayRef<const int> displacementForEachRank,
169 MPI_Comm communicator)
171 // Now allocate the vector and do the allgatherv
172 int totalExtent = displacementForEachRank.back();
174 std::vector<GpuTask> result;
175 result.reserve(totalExtent);
176 if (g_usingMpi && extentOnEachRank.size() > 1 && totalExtent > 0)
178 result.resize(totalExtent);
179 // TODO This works as an MPI_Allgatherv, but thread-MPI does
180 // not implement that. It's only intra-node communication, and
181 // happens rarely, so not worth optimizing (yet). Also
182 // thread-MPI segfaults with 1 rank and with zero totalExtent.
185 // Calling a C API with the const T * from data() doesn't seem to compile reliably.
186 // TODO Make an allgatherv template to deal with this nonsense.
187 MPI_Gatherv(const_cast<GpuTask*>(input.data()),
190 const_cast<GpuTask*>(result.data()),
191 const_cast<int*>(extentOnEachRank.data()),
192 const_cast<int*>(displacementForEachRank.data()),
196 MPI_Bcast(const_cast<GpuTask*>(result.data()), result.size(), MPI_INT, root, communicator);
198 GMX_UNUSED_VALUE(communicator);
203 for (const auto& gpuTask : input)
205 result.push_back(gpuTask);
213 /*! \brief Returns container of all tasks on all ranks of this node
214 * that are eligible for GPU execution.
216 * Perform all necessary communication for preparing for task
217 * assignment. Separating this aspect makes it possible to unit test
218 * the logic of task assignment. */
219 GpuTasksOnRanks findAllGpuTasksOnThisNode(ArrayRef<const GpuTask> gpuTasksOnThisRank,
220 const PhysicalNodeCommunicator& physicalNodeComm)
222 int numRanksOnThisNode = physicalNodeComm.size_;
223 MPI_Comm communicator = physicalNodeComm.comm_;
224 // Find out how many GPU tasks are on each rank on this node.
225 auto numGpuTasksOnEachRankOfThisNode =
226 allgather(gpuTasksOnThisRank.size(), numRanksOnThisNode, communicator);
228 /* Collect on each rank of this node a vector describing all
229 * GPU tasks on this node, in ascending order of rank. This
230 * requires a vector allgather. The displacements indicate where
231 * the GPU tasks on each rank of this node start and end within
233 auto displacementsForEachRank =
234 computeDisplacements(numGpuTasksOnEachRankOfThisNode, numRanksOnThisNode);
235 auto gpuTasksOnThisNode = allgatherv(
236 gpuTasksOnThisRank, numGpuTasksOnEachRankOfThisNode, displacementsForEachRank, communicator);
238 /* Next, we re-use the displacements to break up the vector
239 * of GPU tasks into something that can be indexed like
240 * gpuTasks[rankIndex][taskIndex]. */
241 GpuTasksOnRanks gpuTasksOnRanksOfThisNode;
242 // TODO This would be nicer if we had a good abstraction for "pair
243 // of iterators that point to adjacent container elements" or
244 // "iterator that points to the first of a pair of valid adjacent
245 // container elements, or end".
246 GMX_ASSERT(displacementsForEachRank.size() > 1,
247 "Even with one rank, there's always both a start and end displacement");
248 auto currentDisplacementIt = displacementsForEachRank.begin();
249 auto nextDisplacementIt = currentDisplacementIt + 1;
252 gpuTasksOnRanksOfThisNode.emplace_back(std::vector<GpuTask>());
253 for (auto taskOnThisRankIndex = *currentDisplacementIt; taskOnThisRankIndex != *nextDisplacementIt;
254 ++taskOnThisRankIndex)
256 gpuTasksOnRanksOfThisNode.back().push_back(gpuTasksOnThisNode[taskOnThisRankIndex]);
259 currentDisplacementIt = nextDisplacementIt;
260 ++nextDisplacementIt;
261 } while (nextDisplacementIt != displacementsForEachRank.end());
263 return gpuTasksOnRanksOfThisNode;