#include "biassharing.h"
+#include "config.h"
+
+#include <algorithm>
+#include <set>
#include <vector>
#include "gromacs/gmxlib/network.h"
#include "gromacs/mdrunutility/multisim.h"
#include "gromacs/mdtypes/awh_params.h"
#include "gromacs/mdtypes/commrec.h"
+#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/stringutil.h"
namespace gmx
{
-bool haveBiasSharingWithinSimulation(const AwhParams& awhParams)
+namespace
{
- bool haveSharing = false;
- for (int k = 0; k < awhParams.numBias; k++)
+//! Determines and returns which of the local biases are shared with who how many other simulations
+std::multiset<int> getGlobalShareIndices(ArrayRef<const int> localShareIndices, MPI_Comm simulationMastersComm)
+{
+#if GMX_MPI
+ int numSimulations;
+ MPI_Comm_size(simulationMastersComm, &numSimulations);
+ int ourRank;
+ MPI_Comm_rank(simulationMastersComm, &ourRank);
+ std::vector<int> biasCountsIn(numSimulations, 0);
+ std::vector<int> biasCounts(numSimulations, 0);
+ biasCountsIn[ourRank] = localShareIndices.size();
+ MPI_Allreduce(biasCountsIn.data(), biasCounts.data(), numSimulations, MPI_INT, MPI_SUM, simulationMastersComm);
+ // Now we need to gather the share indices to all (master) ranks.
+ // We could use MPI_Allgatherv, but thread-MPI does not support that and using
+ // MPI_Allreduce produces simpler code, so we use that.
+ int totNumBiases = 0;
+ int ourOffset = 0;
+ for (int rank = 0; rank < numSimulations; rank++)
{
- int shareGroup = awhParams.awhBiasParams[k].shareGroup;
- if (shareGroup > 0)
+ if (rank == ourRank)
+ {
+ ourOffset = totNumBiases;
+ }
+ totNumBiases += biasCounts[rank];
+ }
+ // Fill a buffer with zeros and our part of sharing indices
+ std::vector<int> shareIndicesAllIn(totNumBiases, 0);
+ std::copy(localShareIndices.begin(), localShareIndices.end(), shareIndicesAllIn.begin() + ourOffset);
+ // Gather all sharing indices to all (master) ranks
+ std::vector<int> shareIndicesAll(totNumBiases);
+ MPI_Allreduce(shareIndicesAllIn.data(), shareIndicesAll.data(), totNumBiases, MPI_INT, MPI_SUM, simulationMastersComm);
+#else
+ GMX_UNUSED_VALUE(simulationMastersComm);
+
+ ArrayRef<const int> shareIndicesAll = localShareIndices;
+#endif // GMX_MPI
+
+ std::multiset<int> shareIndicesSet;
+ for (int shareIndex : shareIndicesAll)
+ {
+ if (shareIndex > 0)
+ {
+ shareIndicesSet.insert(shareIndex);
+ }
+ }
+
+ return shareIndicesSet;
+}
+
+} // namespace
+
+BiasSharing::BiasSharing(const AwhParams& awhParams, const t_commrec& commRecord, MPI_Comm simulationMastersComm) :
+ commRecord_(commRecord)
+{
+ if (MASTER(&commRecord))
+ {
+ std::vector<int> localShareIndices;
+ int shareGroupPrev = 0;
+ for (int k = 0; k < awhParams.numBias(); k++)
{
- for (int i = k + 1; i < awhParams.numBias; i++)
+ const int shareGroup = awhParams.awhBiasParams()[k].shareGroup();
+ GMX_RELEASE_ASSERT(shareGroup >= 0, "Bias share group values should be >= 0");
+ localShareIndices.push_back(shareGroup);
+ if (shareGroup > 0)
{
- if (awhParams.awhBiasParams[i].shareGroup == shareGroup)
+ if (shareGroup <= shareGroupPrev)
{
- haveSharing = true;
+ GMX_THROW(
+ InvalidInputError("AWH biases that are shared should use increasing "
+ "share-group values"));
}
+ shareGroupPrev = shareGroup;
}
}
- }
+ std::multiset<int> globalShareIndices =
+ getGlobalShareIndices(localShareIndices, simulationMastersComm);
- return haveSharing;
-}
+ int numSimulations = 1;
+#if GMX_MPI
+ MPI_Comm_size(simulationMastersComm, &numSimulations);
+ int myRank;
+ MPI_Comm_rank(simulationMastersComm, &myRank);
+#endif // GMX_MPI
-void biasesAreCompatibleForSharingBetweenSimulations(const AwhParams& awhParams,
- ArrayRef<const size_t> pointSize,
- const gmx_multisim_t* multiSimComm)
-{
- const int numSim = multiSimComm->numSimulations_;
+ numSharingSimulations_.resize(awhParams.numBias(), 1);
+ sharingSimulationIndices_.resize(awhParams.numBias(), 0);
+ multiSimCommPerBias_.resize(awhParams.numBias(), MPI_COMM_NULL);
- /* We currently enforce subsequent shared biases to have consecutive
- * share-group values starting at 1. This means we can reduce shared
- * biases in order over the ranks and it does not restrict possibilities.
- */
- int numShare = 0;
- for (int b = 0; b < awhParams.numBias; b++)
- {
- int group = awhParams.awhBiasParams[b].shareGroup;
- if (group > 0)
+ for (int shareIndex : globalShareIndices)
{
- numShare++;
- if (group != numShare)
+ if (globalShareIndices.count(shareIndex) > 1)
{
- GMX_THROW(
- InvalidInputError("AWH biases that are shared should use consequetive "
- "share-group values starting at 1"));
+ const auto& findBiasIndex =
+ std::find(localShareIndices.begin(), localShareIndices.end(), shareIndex);
+ const index localBiasIndex = (findBiasIndex == localShareIndices.end()
+ ? -1
+ : findBiasIndex - localShareIndices.begin());
+ MPI_Comm splitComm;
+ if (static_cast<int>(globalShareIndices.count(shareIndex)) == numSimulations)
+ {
+ splitComm = simulationMastersComm;
+ }
+ else
+ {
+#if GMX_MPI
+ const int haveLocally = (localBiasIndex >= 0 ? 1 : 0);
+ MPI_Comm_split(simulationMastersComm, haveLocally, myRank, &splitComm);
+ createdCommList_.push_back(splitComm);
+#else
+ GMX_RELEASE_ASSERT(false, "Can not have sharing without MPI");
+#endif // GMX_MPI
+ }
+ if (localBiasIndex >= 0)
+ {
+ numSharingSimulations_[localBiasIndex] = globalShareIndices.count(shareIndex);
+#if GMX_MPI
+ MPI_Comm_rank(splitComm, &sharingSimulationIndices_[localBiasIndex]);
+#endif // GMX_MPI
+ multiSimCommPerBias_[localBiasIndex] = splitComm;
+ }
}
}
}
- std::vector<int> numShareAll(numSim);
- numShareAll[multiSimComm->simulationIndex_] = numShare;
- gmx_sumi_sim(numShareAll.size(), numShareAll.data(), multiSimComm);
- for (int sim = 1; sim < numSim; sim++)
+
+#if GMX_MPI
+ if (commRecord.nnodes > 1)
{
- if (numShareAll[sim] != numShareAll[0])
- {
- GMX_THROW(InvalidInputError(
- "Different simulations attempt to share different number of biases"));
- }
+ numSharingSimulations_.resize(awhParams.numBias());
+ MPI_Bcast(
+ numSharingSimulations_.data(), numSharingSimulations_.size(), MPI_INT, 0, commRecord.mpi_comm_mygroup);
}
+#endif // GMX_MPI
+}
- std::vector<int> intervals(numSim * 2);
- intervals[numSim * 0 + multiSimComm->simulationIndex_] = awhParams.nstSampleCoord;
- intervals[numSim * 1 + multiSimComm->simulationIndex_] = awhParams.numSamplesUpdateFreeEnergy;
- gmx_sumi_sim(intervals.size(), intervals.data(), multiSimComm);
- for (int sim = 1; sim < numSim; sim++)
+BiasSharing::~BiasSharing()
+{
+#if GMX_MPI
+ for (MPI_Comm comm : createdCommList_)
{
- if (intervals[sim] != intervals[0])
- {
- GMX_THROW(
- InvalidInputError("All simulations should have the same AWH sample interval"));
- }
- if (intervals[numSim + sim] != intervals[numSim])
+ MPI_Comm_free(&comm);
+ }
+#endif // GMX_MPI
+}
+
+namespace
+{
+
+#if GMX_MPI
+
+template<typename T>
+std::enable_if_t<std::is_same_v<T, int>, MPI_Datatype> mpiType()
+{
+ return MPI_INT;
+}
+
+template<typename T>
+std::enable_if_t<std::is_same_v<T, long>, MPI_Datatype> mpiType()
+{
+ return MPI_LONG;
+}
+
+template<typename T>
+std::enable_if_t<std::is_same_v<T, double>, MPI_Datatype> mpiType()
+{
+ return MPI_DOUBLE;
+}
+
+#endif // GMX_MPI
+
+} // namespace
+
+/*! \brief
+ * Sum an array over all simulations on master ranks or all ranks of each simulation.
+ *
+ * This assumes the data is identical on all ranks within each simulation.
+ *
+ * \param[in,out] data The data to sum.
+ * \param[in] multiSimComm Communicator for the master ranks of sharing simulations.
+ * \param[in] broadcastWithinSimulation Broadcast the result to all ranks within the simulation
+ * \param[in] commRecord Struct for intra-simulation communication.
+ */
+template<typename T>
+void sumOverSimulations(ArrayRef<T> data,
+ MPI_Comm multiSimComm,
+ const bool broadcastWithinSimulation,
+ const t_commrec& commRecord)
+{
+#if GMX_MPI
+ if (MASTER(&commRecord))
+ {
+ MPI_Allreduce(MPI_IN_PLACE, data.data(), data.size(), mpiType<T>(), MPI_SUM, multiSimComm);
+ }
+ if (broadcastWithinSimulation && commRecord.nnodes > 1)
+ {
+ gmx_bcast(data.size() * sizeof(T), data.data(), commRecord.mpi_comm_mygroup);
+ }
+#else
+ GMX_UNUSED_VALUE(data);
+ GMX_UNUSED_VALUE(commRecord);
+ GMX_UNUSED_VALUE(broadcastWithinSimulation);
+ GMX_UNUSED_VALUE(multiSimComm);
+#endif // GMX_MPI
+}
+
+void BiasSharing::sumOverSharingMasterRanks(ArrayRef<int> data, const int biasIndex) const
+{
+ sumOverSimulations(data, multiSimCommPerBias_[biasIndex], false, commRecord_);
+}
+
+void BiasSharing::sumOverSharingMasterRanks(ArrayRef<long> data, const int biasIndex) const
+{
+ sumOverSimulations(data, multiSimCommPerBias_[biasIndex], false, commRecord_);
+}
+
+void BiasSharing::sumOverSharingSimulations(ArrayRef<int> data, const int biasIndex) const
+{
+ sumOverSimulations(data, multiSimCommPerBias_[biasIndex], true, commRecord_);
+}
+
+void BiasSharing::sumOverSharingSimulations(ArrayRef<double> data, const int biasIndex) const
+{
+ sumOverSimulations(data, multiSimCommPerBias_[biasIndex], true, commRecord_);
+}
+
+bool haveBiasSharingWithinSimulation(const AwhParams& awhParams)
+{
+ bool haveSharing = false;
+
+ for (int k = 0; k < awhParams.numBias(); k++)
+ {
+ int shareGroup = awhParams.awhBiasParams()[k].shareGroup();
+ if (shareGroup > 0)
{
- GMX_THROW(InvalidInputError(
- "All simulations should have the same AWH free-energy update interval"));
+ for (int i = k + 1; i < awhParams.numBias(); i++)
+ {
+ if (awhParams.awhBiasParams()[i].shareGroup() == shareGroup)
+ {
+ haveSharing = true;
+ }
+ }
}
}
+ return haveSharing;
+}
+
+void biasesAreCompatibleForSharingBetweenSimulations(const AwhParams& awhParams,
+ ArrayRef<const size_t> pointSize,
+ const BiasSharing& biasSharing)
+{
/* Check the point sizes. This is a sufficient condition for running
* as shared multi-sim run. No physics checks are performed here.
*/
- for (int b = 0; b < awhParams.numBias; b++)
+ const auto& awhBiasParams = awhParams.awhBiasParams();
+ for (int b = 0; b < gmx::ssize(awhBiasParams); b++)
{
- if (awhParams.awhBiasParams[b].shareGroup > 0)
+ if (awhBiasParams[b].shareGroup() > 0)
{
- std::vector<int64_t> pointSizes(numSim);
- pointSizes[multiSimComm->simulationIndex_] = pointSize[b];
- gmx_sumli_sim(pointSizes.size(), pointSizes.data(), multiSimComm);
+ const int numSim = biasSharing.numSharingSimulations(b);
+ if (numSim == 1)
+ {
+ // This bias is not actually shared
+ continue;
+ }
+ const int simIndex = biasSharing.sharingSimulationIndex(b);
+ std::vector<int> intervals(numSim * 2);
+ intervals[numSim * 0 + simIndex] = awhParams.nstSampleCoord();
+ intervals[numSim * 1 + simIndex] = awhParams.numSamplesUpdateFreeEnergy();
+ biasSharing.sumOverSharingMasterRanks(intervals, b);
+ for (int sim = 1; sim < numSim; sim++)
+ {
+ if (intervals[sim] != intervals[0])
+ {
+ GMX_THROW(InvalidInputError(
+ "All simulations should have the same AWH sample interval"));
+ }
+ if (intervals[numSim + sim] != intervals[numSim])
+ {
+ GMX_THROW(
+ InvalidInputError("All simulations should have the same AWH "
+ "free-energy update interval"));
+ }
+ }
+
+ std::vector<long> pointSizes(numSim);
+ pointSizes[simIndex] = pointSize[b];
+ biasSharing.sumOverSharingMasterRanks(pointSizes, b);
for (int sim = 1; sim < numSim; sim++)
{
if (pointSizes[sim] != pointSizes[0])