2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Defines routine for collecting all GPU tasks found on ranks of a node.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
44 #include "findallgputasks.h"
51 #include "gromacs/utility/exceptions.h"
52 #include "gromacs/utility/gmxassert.h"
53 #include "gromacs/utility/gmxmpi.h"
54 #include "gromacs/utility/physicalnodecommunicator.h"
62 //! Constant used to help minimize preprocessing of code.
63 constexpr bool g_usingMpi = GMX_MPI;
65 //! Helper function to prepare to all-gather the vector of non-bonded tasks on this node.
66 std::vector<int> allgather(const int &input,
68 MPI_Comm communicator)
70 std::vector<int> result(numRanks);
71 if (g_usingMpi && numRanks > 1)
73 // TODO This works as an MPI_Allgather, but thread-MPI does
74 // not implement that. It's only intra-node communication, and
75 // happens rarely, so not worth optimizing (yet). Also
76 // thread-MPI segfaults with 1 rank.
79 // Calling a C API with the const T * from data() doesn't seem
80 // to compile warning-free with all versions of MPI headers.
82 // TODO Make an allgather template to deal with this nonsense.
83 MPI_Gather(const_cast<int *>(&input),
86 const_cast<int *>(result.data()),
91 MPI_Bcast(const_cast<int *>(result.data()),
97 GMX_UNUSED_VALUE(communicator);
108 //! Helper function to compute allgatherv displacements.
109 std::vector<int> computeDisplacements(ArrayRef<const int> extentOnEachRank,
112 std::vector<int> displacements(numRanks + 1);
113 displacements[0] = 0;
114 std::partial_sum(std::begin(extentOnEachRank), std::end(extentOnEachRank), std::begin(displacements) + 1);
115 return displacements;
118 //! Helper function to all-gather the vector of all GPU tasks on ranks of this node.
119 std::vector<GpuTask> allgatherv(ArrayRef<const GpuTask> input,
120 ArrayRef<const int> extentOnEachRank,
121 ArrayRef<const int> displacementForEachRank,
122 MPI_Comm communicator)
124 // Now allocate the vector and do the allgatherv
125 int totalExtent = displacementForEachRank.back();
127 std::vector<GpuTask> result;
128 result.reserve(totalExtent);
129 if (g_usingMpi && extentOnEachRank.size() > 1 && totalExtent > 0)
131 result.resize(totalExtent);
132 // TODO This works as an MPI_Allgatherv, but thread-MPI does
133 // not implement that. It's only intra-node communication, and
134 // happens rarely, so not worth optimizing (yet). Also
135 // thread-MPI segfaults with 1 rank and with zero totalExtent.
138 // Calling a C API with the const T * from data() doesn't seem to compile reliably.
139 // TODO Make an allgatherv template to deal with this nonsense.
140 MPI_Gatherv(const_cast<GpuTask *>(input.data()),
143 const_cast<GpuTask *>(result.data()),
144 const_cast<int *>(extentOnEachRank.data()),
145 const_cast<int *>(displacementForEachRank.data()),
149 MPI_Bcast(const_cast<GpuTask *>(result.data()),
155 GMX_UNUSED_VALUE(communicator);
160 for (const auto &gpuTask : input)
162 result.push_back(gpuTask);
170 /*! \brief Returns container of all tasks on all ranks of this node
171 * that are eligible for GPU execution.
173 * Perform all necessary communication for preparing for task
174 * assignment. Separating this aspect makes it possible to unit test
175 * the logic of task assignment. */
177 findAllGpuTasksOnThisNode(ArrayRef<const GpuTask> gpuTasksOnThisRank,
178 const PhysicalNodeCommunicator &physicalNodeComm)
180 int numRanksOnThisNode = physicalNodeComm.size_;
181 MPI_Comm communicator = physicalNodeComm.comm_;
182 // Find out how many GPU tasks are on each rank on this node.
183 auto numGpuTasksOnEachRankOfThisNode =
184 allgather(gpuTasksOnThisRank.size(), numRanksOnThisNode, communicator);
186 /* Collect on each rank of this node a vector describing all
187 * GPU tasks on this node, in ascending order of rank. This
188 * requires a vector allgather. The displacements indicate where
189 * the GPU tasks on each rank of this node start and end within
191 auto displacementsForEachRank = computeDisplacements(numGpuTasksOnEachRankOfThisNode, numRanksOnThisNode);
192 auto gpuTasksOnThisNode = allgatherv(gpuTasksOnThisRank, numGpuTasksOnEachRankOfThisNode,
193 displacementsForEachRank, communicator);
195 /* Next, we re-use the displacements to break up the vector
196 * of GPU tasks into something that can be indexed like
197 * gpuTasks[rankIndex][taskIndex]. */
198 GpuTasksOnRanks gpuTasksOnRanksOfThisNode;
199 // TODO This would be nicer if we had a good abstraction for "pair
200 // of iterators that point to adjacent container elements" or
201 // "iterator that points to the first of a pair of valid adjacent
202 // container elements, or end".
203 GMX_ASSERT(displacementsForEachRank.size() > 1, "Even with one rank, there's always both a start and end displacement");
204 auto currentDisplacementIt = displacementsForEachRank.begin();
205 auto nextDisplacementIt = currentDisplacementIt + 1;
208 gpuTasksOnRanksOfThisNode.emplace_back(std::vector<GpuTask>());
209 for (auto taskOnThisRankIndex = *currentDisplacementIt; taskOnThisRankIndex != *nextDisplacementIt; ++taskOnThisRankIndex)
211 gpuTasksOnRanksOfThisNode.back().push_back(gpuTasksOnThisNode[taskOnThisRankIndex]);
214 currentDisplacementIt = nextDisplacementIt;
215 ++nextDisplacementIt;
217 while (nextDisplacementIt != displacementsForEachRank.end());
219 return gpuTasksOnRanksOfThisNode;