Fixes where mdrun could behave incorrectly
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Fix fatal error with mdrun -multidir with more than 1 rank per simulation
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+:issue:`3297`
+
Fixes for ``gmx`` tools
^^^^^^^^^^^^^^^^^^^^^^^
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
gmx_fatal_mpi_va(f_errno, file, line, bMaster, bFinalize, fmt, ap);
va_end(ap);
}
-
-void simulationBarrier(const t_commrec* cr)
-{
- if (PAR(cr))
- {
-#if GMX_MPI
- MPI_Barrier(cr->mpi_comm_mysim);
-#endif
- }
-}
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* for all processes.
*/
-//! Make a barrier across all ranks of this simulation
-void simulationBarrier(const t_commrec* cr);
-
#endif
// Produce the task assignment for this rank.
GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
GpuTaskAssignments gpuTaskAssignments = gpuTaskAssignmentsBuilder.build(
- gpuIdsToUse, userGpuTaskAssignment, *hwinfo, cr, ms, physicalNodeComm, nonbondedTarget,
- pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded, useGpuForPme,
- thisRankHasDuty(cr, DUTY_PP),
+ gpuIdsToUse, userGpuTaskAssignment, *hwinfo, communicator, physicalNodeComm,
+ nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded,
+ useGpuForPme, thisRankHasDuty(cr, DUTY_PP),
// TODO cr->duty & DUTY_PME should imply that a PME
// algorithm is active, but currently does not.
EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
return (isMaster && isMasterSim(ms));
}
-
-void multiSimBarrier(const gmx_multisim_t* ms)
-{
- if (isMultiSim(ms))
- {
-#if GMX_MPI
- if (ms->mpi_comm_masters != MPI_COMM_NULL)
- {
- MPI_Barrier(ms->mpi_comm_masters);
- }
-#endif
- }
-}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* This rank prints the remaining run time etc. */
bool isMasterSimMasterRank(const gmx_multisim_t* ms, bool isMaster);
-//! Make a barrier across all multi-simulation master ranks
-void multiSimBarrier(const gmx_multisim_t* ms);
-
#endif
#include "taskassignment.h"
+#include "config.h"
+
#include <algorithm>
#include <exception>
#include <string>
/*! \brief Return on each rank the total count over all ranks of all
* simulations. */
-int countOverAllRanks(const t_commrec* cr, const gmx_multisim_t* ms, const int countOnThisRank)
+int countOverAllRanks(MPI_Comm comm, int countOnThisRank)
{
- int countOverAllRanksValue = countOnThisRank;
- if (PAR(cr))
+ int sum;
+#if GMX_MPI
+ int numRanks;
+ MPI_Comm_size(comm, &numRanks);
+ if (numRanks > 1)
{
- // Count over the ranks of this simulation.
- gmx_sumi(1, &countOverAllRanksValue, cr);
+ MPI_Allreduce(&countOnThisRank, &sum, 1, MPI_INT, MPI_SUM, comm);
}
- if (isMultiSim(ms))
+ else
+#endif
{
- // Count over the ranks of all simulations.
- gmx_sumi_sim(1, &countOverAllRanksValue, ms);
- if (PAR(cr))
- {
- // Propagate the information from other simulations back
- // to non-master ranks so they can all agree on future
- // behavior.
- gmx_bcast(sizeof(decltype(countOverAllRanksValue)), &countOverAllRanksValue, cr);
- }
+ sum = countOnThisRank;
+ }
+
+ return sum;
+}
+
+/*! \brief Barrier over all rank in \p comm */
+void barrierOverAllRanks(MPI_Comm comm)
+{
+#if GMX_MPI
+ int numRanks;
+ MPI_Comm_size(comm, &numRanks);
+ if (numRanks > 1)
+ {
+ MPI_Barrier(comm);
}
- return countOverAllRanksValue;
+#else
+ GMX_UNUSED_VALUE(comm);
+#endif
}
} // namespace
GpuTaskAssignments GpuTaskAssignmentsBuilder::build(const std::vector<int>& gpuIdsToUse,
const std::vector<int>& userGpuTaskAssignment,
const gmx_hw_info_t& hardwareInfo,
- const t_commrec* cr,
- const gmx_multisim_t* ms,
+ MPI_Comm gromacsWorldComm,
const PhysicalNodeCommunicator& physicalNodeComm,
const TaskTarget nonbondedTarget,
const TaskTarget pmeTarget,
{
exceptionPtr = std::current_exception();
}
- int countOfExceptionsOnThisRank = int(bool(exceptionPtr));
- int countOfExceptionsOverAllRanks = countOverAllRanks(cr, ms, countOfExceptionsOnThisRank);
+ int countOfExceptionsOnThisRank = int(bool(exceptionPtr));
+ int countOfExceptionsOverAllRanks = countOverAllRanks(gromacsWorldComm, countOfExceptionsOnThisRank);
// Avoid all ranks spamming the error stream
//
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
}
- // TODO This implements a global barrier so that MPI runtimes can
+ // TODO Global barrier so that MPI runtimes can
// organize an orderly shutdown if one of the ranks has had to
// issue a fatal error above. When we have MPI-aware error
// handling and reporting, this should be improved (perhaps
// centralized there).
- simulationBarrier(cr);
- multiSimBarrier(ms);
- simulationBarrier(cr);
+ barrierOverAllRanks(gromacsWorldComm);
if (countOfExceptionsOverAllRanks > 0)
{
gmx_fatal(FARGS,
#include <vector>
#include "gromacs/utility/basedefinitions.h"
+#include "gromacs/utility/gmxmpi.h"
struct gmx_device_info_t;
struct gmx_hw_info_t;
-struct gmx_multisim_t;
struct t_commrec;
enum class PmeRunMode;
* \param[in] gpuIdsToUse The compatible GPUs that the user permitted us to use.
* \param[in] userGpuTaskAssignment The user-specified assignment of GPU tasks to device IDs.
* \param[in] hardwareInfo The detected hardware
- * \param[in] cr Communication object.
- * \param[in] ms Multi-simulation handler.
+ * \param[in] gromacsWorldComm MPI communicator for all ranks in the current GROMACS run
* \param[in] physicalNodeComm Communication object for this physical node.
* \param[in] nonbondedTarget The user's choice for mdrun -nb for where to assign
* short-ranged nonbonded interaction tasks.
GpuTaskAssignments build(const std::vector<int>& gpuIdsToUse,
const std::vector<int>& userGpuTaskAssignment,
const gmx_hw_info_t& hardwareInfo,
- const t_commrec* cr,
- const gmx_multisim_t* ms,
+ MPI_Comm gromacsWorldComm,
const PhysicalNodeCommunicator& physicalNodeComm,
TaskTarget nonbondedTarget,
TaskTarget pmeTarget,