void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
{
#if GMX_MPI
- MPI_Comm mpi_comm_pp_physicalnode = MPI_COMM_NULL;
+ gmx_domdec_t* dd = cr->dd;
if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
{
return;
}
- const int physicalnode_id_hash = gmx_physicalnode_id_hash();
+ if (cr->nnodes == 1)
+ {
+ dd->comm->nrank_gpu_shared = 1;
- gmx_domdec_t* dd = cr->dd;
+ return;
+ }
+
+ const int physicalnode_id_hash = gmx_physicalnode_id_hash();
if (debug)
{
*/
// TODO PhysicalNodeCommunicator could be extended/used to handle
// the need for per-node per-group communicators.
+ MPI_Comm mpi_comm_pp_physicalnode;
MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank, &mpi_comm_pp_physicalnode);
MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank, &dd->comm->mpi_comm_gpu_shared);
MPI_Comm_free(&mpi_comm_pp_physicalnode);
getThisRankDuties(cr),
dd_index(cartSetup.ntot, ddCellIndex),
&cr->mpi_comm_mygroup);
+ MPI_Comm_size(cr->mpi_comm_mygroup, &cr->sizeOfMyGroupCommunicator);
#else
GMX_UNUSED_VALUE(ddCellIndex);
#endif
#if GMX_MPI
/* Split the sim communicator into PP and PME only nodes */
MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr), cr->nodeid, &cr->mpi_comm_mygroup);
+ MPI_Comm_size(cr->mpi_comm_mygroup, &cr->sizeOfMyGroupCommunicator);
MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
#endif
}
if (thisRankHasDuty(cr, DUTY_PP))
{
- /* Copy or make a new PP communicator */
+ if (dd->nnodes > 1)
+ {
+ /* Copy or make a new PP communicator */
- /* We (possibly) reordered the nodes in split_communicator,
- * so it is no longer required in make_pp_communicator.
- */
- const bool useCartesianReorder = (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
+ /* We (possibly) reordered the nodes in split_communicator,
+ * so it is no longer required in make_pp_communicator.
+ */
+ const bool useCartesianReorder =
+ (ddSettings.useCartesianReorder && !cartSetup.bCartesianPP_PME);
- make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
+ make_pp_communicator(mdlog, dd, cr, useCartesianReorder);
+ }
}
else
{
void gmx_unused* dest)
{
#if GMX_MPI
- /* Some MPI implementions don't specify const */
- MPI_Gather(const_cast<void*>(src), nbytes, MPI_BYTE, dest, nbytes, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
+ if (dd->nnodes > 1)
+ {
+ /* Some MPI implementions don't specify const */
+ MPI_Gather(const_cast<void*>(src), nbytes, MPI_BYTE, dest, nbytes, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
+ }
+ else
#endif
+ {
+ memcpy(dest, src, nbytes);
+ }
}
void dd_scatterv(const gmx_domdec_t gmx_unused* dd,
void gmx_unused* rbuf)
{
#if GMX_MPI
- int dum = 0;
-
- if (scount == 0)
+ if (dd->nnodes > 1)
{
- /* MPI does not allow NULL pointers */
- sbuf = &dum;
+ int dum;
+
+ if (scount == 0)
+ {
+ /* MPI does not allow NULL pointers */
+ sbuf = &dum;
+ }
+ /* Some MPI implementions don't specify const */
+ MPI_Gatherv(
+ const_cast<void*>(sbuf), scount, MPI_BYTE, rbuf, rcounts, disps, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
}
- /* Some MPI implementions don't specify const */
- MPI_Gatherv(
- const_cast<void*>(sbuf), scount, MPI_BYTE, rbuf, rcounts, disps, MPI_BYTE, DDMASTERRANK(dd), dd->mpi_comm_all);
+ else
#endif
+ {
+ memcpy(rbuf, sbuf, rcounts[0]);
+ }
}
/* The communication setup within the communicator all
* defined in dd->comm in domdec.c
*/
- int nnodes = 0;
+ int nnodes = 1;
MPI_Comm mpi_comm_all = MPI_COMM_NULL;
/* The local DD cell index and rank */
gmx::IVec ci = { 0, 0, 0 };
void LocalTopologyChecker::scheduleCheckOfLocalTopology(const int numBondedInteractionsToReduce)
{
- // Fill the reduction buffer with the value from this domain to reduce
- impl_->reductionBuffer_[0] = double(numBondedInteractionsToReduce);
-
- // Pass the post-reduction callback to the ObservablesReducer via
- // the callback it gave us for the purpose.
- //
- // Note that it's possible that the callbackAfterReduction is already
- // outstanding, e.g. if repartitioning was triggered before
- // observables were reduced. This could happen for example when
- // replica exchange took place soon after a partition. If so, the
- // callback will be called again. So long as there is no race
- // between the calls to this function and the calls to
- // ObservablesReducer for reduction, this will work correctly. It
- // could be made safer e.g. with checks against duplicate
- // callbacks, but there is no problem to solve.
- //
- // There is no need to check the return value from this callback,
- // as it is not an error to request reduction at a future step.
- impl_->callbackToRequireReduction_(ReductionRequirement::Eventually);
+ // When we have a single domain, we don't need to reduce and we algorithmically can not miss
+ // any interactions, so we can assert here.
+ if (!havePPDomainDecomposition(impl_->cr_))
+ {
+ GMX_RELEASE_ASSERT(numBondedInteractionsToReduce == impl_->expectedNumGlobalBondedInteractions_,
+ "With a single domain the number of assigned bonded interactions should "
+ "always match the global number");
+ }
+ else
+ {
+ // Fill the reduction buffer with the value from this domain to reduce
+ impl_->reductionBuffer_[0] = double(numBondedInteractionsToReduce);
+
+ // Pass the post-reduction callback to the ObservablesReducer via
+ // the callback it gave us for the purpose.
+ //
+ // Note that it's possible that the callbackAfterReduction is already
+ // outstanding, e.g. if repartitioning was triggered before
+ // observables were reduced. This could happen for example when
+ // replica exchange took place soon after a partition. If so, the
+ // callback will be called again. So long as there is no race
+ // between the calls to this function and the calls to
+ // ObservablesReducer for reduction, this will work correctly. It
+ // could be made safer e.g. with checks against duplicate
+ // callbacks, but there is no problem to solve.
+ //
+ // There is no need to check the return value from this callback,
+ // as it is not an error to request reduction at a future step.
+ impl_->callbackToRequireReduction_(ReductionRequirement::Eventually);
+ }
}
} // namespace gmx
/*! \brief Set that the local topology should be checked via
* observables reduction whenever that reduction is required by
- * another module. */
+ * another module. In case of a single domain a direct assertion
+ * is performed instead.
+ */
void scheduleCheckOfLocalTopology(int numBondedInteractionsToReduce);
private:
// For now, we want things to go horribly wrong if this is used too early...
// TODO: Remove when communicators are removed from commrec (#2395)
- cr->nnodes = -1;
- cr->nodeid = -1;
- cr->sim_nodeid = -1;
- cr->mpi_comm_mysim = MPI_COMM_NULL;
- cr->mpi_comm_mygroup = MPI_COMM_NULL;
+ cr->nnodes = -1;
+ cr->sizeOfMyGroupCommunicator = -1;
+ cr->nodeid = -1;
+ cr->sim_nodeid = -1;
+ cr->mpi_comm_mysim = MPI_COMM_NULL;
+ cr->mpi_comm_mygroup = MPI_COMM_NULL;
// TODO cr->duty should not be initialized here
cr->duty = (DUTY_PP | DUTY_PME);
void gmx_barrier(MPI_Comm gmx_unused communicator)
{
+ if (communicator == MPI_COMM_NULL)
+ {
+ return;
+ }
#if !GMX_MPI
GMX_RELEASE_ASSERT(false, "Invalid call to gmx_barrier");
#else
void gmx_sumd(int gmx_unused nr, double gmx_unused r[], const t_commrec gmx_unused* cr)
{
+ if (cr->sizeOfMyGroupCommunicator == 1)
+ {
+ return;
+ }
+
#if !GMX_MPI
GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumd");
#else
void gmx_sumf(int gmx_unused nr, float gmx_unused r[], const t_commrec gmx_unused* cr)
{
+ if (cr->sizeOfMyGroupCommunicator == 1)
+ {
+ return;
+ }
+
#if !GMX_MPI
GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumf");
#else
void gmx_sumi(int gmx_unused nr, int gmx_unused r[], const t_commrec gmx_unused* cr)
{
+ if (cr->sizeOfMyGroupCommunicator == 1)
+ {
+ return;
+ }
+
#if !GMX_MPI
GMX_RELEASE_ASSERT(false, "Invalid call to gmx_sumi");
#else
}
}
- if (!PAR(cr))
+ if (!DOMAINDECOMP(cr))
{
nat_loc = nat;
ind_loc = ind;
}
/* Communicate initial coordinates xa_old to all processes */
- if (PAR(cr))
+ if (cr && havePPDomainDecomposition(cr))
{
gmx_bcast(nat * sizeof(xa_old[0]), xa_old, cr->mpi_comm_mygroup);
}
/* Communicate the coordinates required for the non-local constraints
* for LINCS and/or SETTLE.
*/
- if (cr->dd)
+ if (havePPDomainDecomposition(cr))
{
dd_move_x_constraints(cr->dd,
box,
// With both nonbonded and PME offloaded a GPU on the same rank, we use
// an alternating wait/reduction scheme.
bool alternateGpuWait =
- (!c_disableAlternatingWait && stepWork.haveGpuPmeOnThisRank
- && simulationWork.useGpuNonbonded && !DOMAINDECOMP(cr) && !stepWork.useGpuFBufferOps);
+ (!c_disableAlternatingWait && stepWork.haveGpuPmeOnThisRank && simulationWork.useGpuNonbonded
+ && !simulationWork.havePpDomainDecomposition && !stepWork.useGpuFBufferOps);
+
if (alternateGpuWait)
{
alternatePmeNbGpuWaitReduce(fr->nbv.get(),
"be available in a different form in a future version of GROMACS, "
"e.g. gmx minimize and an .mdp option.");
+ if (DOMAINDECOMP(cr))
+ {
+ gmx_fatal(FARGS, "L_BFGS is currently not supported");
+ }
if (PAR(cr))
{
gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank");
"Linear acceleration has been removed in GROMACS 2022, and was broken for many years "
"before that. Use GROMACS 4.5 or earlier if you need this feature.");
+ // Now we decide whether to use the domain decomposition machinery.
+ // Note that this does not necessarily imply actually using multiple domains.
// Now the number of ranks is known to all ranks, and each knows
// the inputrec read by the master rank. The ranks can now all run
// the task-deciding functions and will agree on the result
// without needing to communicate.
+ // The LBFGS minimizer, test-particle insertion, normal modes and shell dynamics don't support DD
const bool useDomainDecomposition =
- (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == IntegrationAlgorithm::NM));
+ !(inputrec->eI == IntegrationAlgorithm::LBFGS || EI_TPI(inputrec->eI)
+ || inputrec->eI == IntegrationAlgorithm::NM
+ || gmx_mtop_particletype_count(mtop)[ParticleType::Shell] > 0);
// Note that these variables describe only their own node.
//
if (deviceInfo != nullptr)
{
- if (DOMAINDECOMP(cr) && thisRankHasDuty(cr, DUTY_PP))
+ if (runScheduleWork.simulationWork.havePpDomainDecomposition && thisRankHasDuty(cr, DUTY_PP))
{
dd_setup_dlb_resource_sharing(cr, deviceId);
}
"not supported in combination with shell particles.\nPlease make a new tpr file.",
nstcalcenergy);
}
- if (usingDomainDecomposition)
+ if (nshell > 0 && usingDomainDecomposition)
{
gmx_fatal(
FARGS,
* All communication within some simulation should happen
* in mpi_comm_mysim, or its subset mpi_comm_mygroup.
*/
- int sim_nodeid, nnodes, npmenodes;
-
- /* thread numbers: */
- /* Not used yet: int threadid, nthreads; */
- /* The nodeid in the PP/PME, PP or PME group */
+ //! The rank-id in mpi_comm_mysim;
+ int sim_nodeid;
+ //! The number of ranks in mpi_comm_mysim
+ int nnodes;
+ //! The number of separate PME ranks, 0 when no separate PME ranks are used
+ int npmenodes;
+
+ //! The rank-id in mpi_comm_mygroup;
int nodeid;
/* MPI communicators within a single simulation
a single simulation */
MPI_Comm mpi_comm_mygroup; /* subset of mpi_comm_mysim including only
the ranks in the same group (PP or PME) */
+ //! The number of ranks in mpi_comm_mygroup
+ int sizeOfMyGroupCommunicator;
+
//! The communicator used before DD was initialized
MPI_Comm mpiDefaultCommunicator;
int sizeOfDefaultCommunicator;
//! The node id for the master
#define MASTERRANK(cr) (0)
-/*! \brief Do we decompose the work of this simulation?
- *
- * True if this simulation uses more than one PP rank, or if this simulation
- * uses at least one PME-only rank.
+/*! \brief Returns whether the domain decomposition machinery is active
*
- * PAR(cr) is true if this is true, but the converse does not apply (see docs
- * of PAR(cr)).
- *
- * This is true if havePPDomainDecomposition is true, but the converse does not
- * apply (see docs of havePpDomainDecomposition()).
- *
- * \todo As part of Issue #2395, replace calls to this with
- * havePPDomainDecomposition or a call of some other/new function, as
- * appropriate to each case. Then eliminate this macro. */
-#define DOMAINDECOMP(cr) (((cr)->dd != nullptr) && PAR(cr))
+ * Note that when the return value is true, there are not necessarily
+ * multiple domains. The domain decomposition machinery is also active and
+ * reorders the atoms also with a single MPI rank, or 1 PP and 1 PME rank,
+ * with most integrators. Only a few special non-integrator "integrators"
+ * do not (yet) support the domain decomposition machinery and therefore
+ * this macro is still needed.
+ */
+#define DOMAINDECOMP(cr) ((cr)->dd != nullptr)
/*! \brief Returns whether we have actual domain decomposition for the particle-particle interactions
*
const gmx_domdec_zones_t* ddZones) :
pbcType(pbcType),
doTestParticleInsertion(doTestParticleInsertion),
- haveMultipleDomains(numDDCells != nullptr),
+ haveMultipleDomains(numDDCells != nullptr
+ && (*numDDCells)[XX] * (*numDDCells)[YY] * (*numDDCells)[ZZ] > 1),
zones(ddZones)
{
for (int d = 0; d < DIM; d++)
nthreads_omp_mpi_ok_min = nthreads_omp_mpi_ok_min_gpu;
}
- if (DOMAINDECOMP(cr))
+ if (cr && cr->nnodes > 1)
{
if (nth_omp_max < nthreads_omp_mpi_ok_min || nth_omp_max > nthreads_omp_mpi_ok_max)
{