2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Implements the multi-simulation support routines.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_mdrunutility
48 #include "gromacs/mdtypes/commrec.h"
49 #include "gromacs/utility/exceptions.h"
50 #include "gromacs/utility/fatalerror.h"
51 #include "gromacs/utility/futil.h"
52 #include "gromacs/utility/gmxassert.h"
53 #include "gromacs/utility/smalloc.h"
55 std::unique_ptr<gmx_multisim_t> buildMultiSimulation(MPI_Comm worldComm,
56 gmx::ArrayRef<const std::string> multidirs)
58 if (multidirs.empty())
63 if (!GMX_LIB_MPI && !multidirs.empty())
65 GMX_THROW(gmx::NotImplementedError(
66 "Multi-simulations are only supported when GROMACS has been "
67 "configured with a proper external MPI library."));
70 if (multidirs.size() == 1)
72 /* NOTE: It would be nice if this special case worked, but this requires checks/tests. */
73 GMX_THROW(gmx::NotImplementedError(
74 "To run mdrun in multi-simulation mode, more then one "
75 "actual simulation is required. The single simulation case is not supported."));
80 MPI_Comm_size(worldComm, &numRanks);
81 if (numRanks % multidirs.size() != 0)
83 auto message = gmx::formatString(
84 "The number of ranks (%d) is not a multiple of the number of simulations (%td)",
85 numRanks, multidirs.ssize());
86 GMX_THROW(gmx::InconsistentInputError(message));
89 int numRanksPerSimulation = numRanks / multidirs.size();
90 int rankWithinWorldComm;
91 MPI_Comm_rank(worldComm, &rankWithinWorldComm);
95 fprintf(debug, "We have %td simulations, %d ranks per simulation, local simulation is %d\n",
96 multidirs.ssize(), numRanksPerSimulation, rankWithinWorldComm / numRanksPerSimulation);
99 int numSimulations = multidirs.size();
100 // Create a communicator for the master ranks of each simulation
101 std::vector<int> ranksOfMasters(numSimulations);
102 for (int i = 0; i < numSimulations; i++)
104 ranksOfMasters[i] = i * numRanksPerSimulation;
106 MPI_Group worldGroup;
107 // No need to free worldGroup later, we didn't create it.
108 MPI_Comm_group(worldComm, &worldGroup);
110 MPI_Group mastersGroup = MPI_GROUP_NULL;
111 MPI_Group_incl(worldGroup, numSimulations, ranksOfMasters.data(), &mastersGroup);
112 MPI_Comm mastersComm = MPI_COMM_NULL;
113 MPI_Comm_create(worldComm, mastersGroup, &mastersComm);
114 if (mastersGroup != MPI_GROUP_NULL)
116 MPI_Group_free(&mastersGroup);
119 int simulationIndex = rankWithinWorldComm / numRanksPerSimulation;
120 MPI_Comm simulationComm = MPI_COMM_NULL;
121 MPI_Comm_split(worldComm, simulationIndex, rankWithinWorldComm, &simulationComm);
125 gmx_chdir(multidirs[simulationIndex].c_str());
127 catch (gmx::GromacsException& e)
129 e.prependContext("While changing directory for multi-simulation to " + multidirs[simulationIndex]);
132 return std::make_unique<gmx_multisim_t>(numSimulations, simulationIndex, mastersComm, simulationComm);
134 GMX_UNUSED_VALUE(worldComm);
139 gmx_multisim_t::gmx_multisim_t(int numSimulations, int simulationIndex, MPI_Comm mastersComm, MPI_Comm simulationComm) :
140 numSimulations_(numSimulations),
141 simulationIndex_(simulationIndex),
142 mastersComm_(mastersComm),
143 simulationComm_(simulationComm)
147 gmx_multisim_t::~gmx_multisim_t()
150 // TODO This would work better if the result of MPI_Comm_split was
151 // put into an RAII-style guard, such as gmx::unique_cptr.
152 if (mastersComm_ != MPI_COMM_NULL && mastersComm_ != MPI_COMM_WORLD)
154 MPI_Comm_free(&mastersComm_);
156 if (simulationComm_ != MPI_COMM_NULL && simulationComm_ != MPI_COMM_WORLD)
158 MPI_Comm_free(&simulationComm_);
164 static void gmx_sumd_comm(int nr, double r[], MPI_Comm mpi_comm)
166 # if MPI_IN_PLACE_EXISTS
167 MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
169 /* this function is only used in code that is not performance critical,
170 (during setup, when comm_rec is not the appropriate communication
171 structure), so this isn't as bad as it looks. */
176 MPI_Allreduce(r, buf, nr, MPI_DOUBLE, MPI_SUM, mpi_comm);
177 for (i = 0; i < nr; i++)
187 static void gmx_sumf_comm(int nr, float r[], MPI_Comm mpi_comm)
189 # if MPI_IN_PLACE_EXISTS
190 MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
192 /* this function is only used in code that is not performance critical,
193 (during setup, when comm_rec is not the appropriate communication
194 structure), so this isn't as bad as it looks. */
199 MPI_Allreduce(r, buf, nr, MPI_FLOAT, MPI_SUM, mpi_comm);
200 for (i = 0; i < nr; i++)
209 void gmx_sumd_sim(int gmx_unused nr, double gmx_unused r[], const gmx_multisim_t gmx_unused* ms)
212 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumd_sim");
214 gmx_sumd_comm(nr, r, ms->mastersComm_);
218 void gmx_sumf_sim(int gmx_unused nr, float gmx_unused r[], const gmx_multisim_t gmx_unused* ms)
221 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumf_sim");
223 gmx_sumf_comm(nr, r, ms->mastersComm_);
227 void gmx_sumi_sim(int gmx_unused nr, int gmx_unused r[], const gmx_multisim_t gmx_unused* ms)
230 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumi_sim");
232 # if MPI_IN_PLACE_EXISTS
233 MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT, MPI_SUM, ms->mastersComm_);
235 /* this is thread-unsafe, but it will do for now: */
236 ms->intBuffer.resize(nr);
237 MPI_Allreduce(r, ms->intBuffer.data(), ms->intBuffer.size(), MPI_INT, MPI_SUM, ms->mastersComm_);
238 std::copy(std::begin(ms->intBuffer), std::end(ms->intBuffer), r);
243 void gmx_sumli_sim(int gmx_unused nr, int64_t gmx_unused r[], const gmx_multisim_t gmx_unused* ms)
246 GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumli_sim");
248 # if MPI_IN_PLACE_EXISTS
249 MPI_Allreduce(MPI_IN_PLACE, r, nr, MPI_INT64_T, MPI_SUM, ms->mastersComm_);
251 /* this is thread-unsafe, but it will do for now: */
252 ms->int64Buffer.resize(nr);
253 MPI_Allreduce(r, ms->int64Buffer.data(), ms->int64Buffer.size(), MPI_INT64_T, MPI_SUM, ms->mastersComm_);
254 std::copy(std::begin(ms->int64Buffer), std::end(ms->int64Buffer), r);
259 std::vector<int> gatherIntFromMultiSimulation(const gmx_multisim_t* ms, const int localValue)
261 std::vector<int> valuesFromAllRanks;
265 valuesFromAllRanks.resize(ms->numSimulations_);
266 valuesFromAllRanks[ms->simulationIndex_] = localValue;
267 gmx_sumi_sim(ms->numSimulations_, valuesFromAllRanks.data(), ms);
271 valuesFromAllRanks.emplace_back(localValue);
274 GMX_UNUSED_VALUE(ms);
275 valuesFromAllRanks.emplace_back(localValue);
277 return valuesFromAllRanks;
280 void check_multi_int(FILE* log, const gmx_multisim_t* ms, int val, const char* name, gmx_bool bQuiet)
283 gmx_bool bCompatible;
285 if (nullptr != log && !bQuiet)
287 fprintf(log, "Multi-checking %s ... ", name);
292 gmx_fatal(FARGS, "check_multi_int called with a NULL communication pointer");
295 snew(ibuf, ms->numSimulations_);
296 ibuf[ms->simulationIndex_] = val;
297 gmx_sumi_sim(ms->numSimulations_, ibuf, ms);
300 for (p = 1; p < ms->numSimulations_; p++)
302 bCompatible = bCompatible && (ibuf[p - 1] == ibuf[p]);
307 if (nullptr != log && !bQuiet)
309 fprintf(log, "OK\n");
316 fprintf(log, "\n%s is not equal for all subsystems\n", name);
317 for (p = 0; p < ms->numSimulations_; p++)
319 fprintf(log, " subsystem %d: %d\n", p, ibuf[p]);
322 gmx_fatal(FARGS, "The %d subsystems are not compatible\n", ms->numSimulations_);
328 void check_multi_int64(FILE* log, const gmx_multisim_t* ms, int64_t val, const char* name, gmx_bool bQuiet)
332 gmx_bool bCompatible;
334 if (nullptr != log && !bQuiet)
336 fprintf(log, "Multi-checking %s ... ", name);
341 gmx_fatal(FARGS, "check_multi_int called with a NULL communication pointer");
344 snew(ibuf, ms->numSimulations_);
345 ibuf[ms->simulationIndex_] = val;
346 gmx_sumli_sim(ms->numSimulations_, ibuf, ms);
349 for (p = 1; p < ms->numSimulations_; p++)
351 bCompatible = bCompatible && (ibuf[p - 1] == ibuf[p]);
356 if (nullptr != log && !bQuiet)
358 fprintf(log, "OK\n");
363 // TODO Part of this error message would also be good to go to
364 // stderr (from one rank of one sim only)
367 fprintf(log, "\n%s is not equal for all subsystems\n", name);
368 for (p = 0; p < ms->numSimulations_; p++)
371 /* first make the format string */
372 snprintf(strbuf, 255, " subsystem %%d: %s\n", "%" PRId64);
373 fprintf(log, strbuf, p, ibuf[p]);
376 gmx_fatal(FARGS, "The %d subsystems are not compatible\n", ms->numSimulations_);
382 bool findIsSimulationMasterRank(const gmx_multisim_t* ms, MPI_Comm communicator)
386 // Ranks of multi-simulations know whether they are a master
387 // rank. Ranks of non-multi simulation do not know until a
388 // t_commrec is available.
389 if ((ms != nullptr) && (ms->numSimulations_ > 1))
391 return ms->mastersComm_ != MPI_COMM_NULL;
397 MPI_Comm_rank(communicator, &rank);
402 else if (GMX_THREAD_MPI)
404 GMX_RELEASE_ASSERT(communicator == MPI_COMM_NULL || communicator == MPI_COMM_WORLD,
405 "Invalid communicator");
406 // Spawned threads have MPI_COMM_WORLD upon creation, so if
407 // the communicator is MPI_COMM_NULL this is not a spawned thread,
408 // ie is the master thread
409 return (communicator == MPI_COMM_NULL);
413 // No MPI means it must be the master (and only) rank.
418 bool isMasterSim(const gmx_multisim_t* ms)
420 return !isMultiSim(ms) || ms->simulationIndex_ == 0;
423 bool isMasterSimMasterRank(const gmx_multisim_t* ms, const bool isMaster)
425 return (isMaster && isMasterSim(ms));