ddSettings.useSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
ddSettings.dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
- ddSettings.request1D = bool(dd_getenv(mdlog, "GMX_DD_1D", 0));
ddSettings.useDDOrderZYX = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
ddSettings.eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
-/*! \brief Return whether the simulation described can run a 1D DD.
- *
- * The GPU halo exchange code requires 1D DD. Such a DD
- * generally requires a larger box than other possible decompositions
- * with the same rank count, so the calling code might need to decide
- * what is the most appropriate way to run the simulation based on
- * whether such a DD is possible.
- *
- * This function works like init_domain_decomposition(), but will not
- * give a fatal error, and only uses \c cr for communicating between
- * ranks.
- *
- * It is safe to call before thread-MPI spawns ranks, so that
- * thread-MPI can decide whether and how to trigger the GPU halo
- * exchange code path. The number of PME ranks, if any, should be set
- * in \c options.numPmeRanks.
- */
-static bool canMake1DDomainDecomposition(const DDSettings& ddSettingsOriginal,
- DDRole ddRole,
- MPI_Comm communicator,
- const int numRanksRequested,
- const DomdecOptions& options,
- const gmx_mtop_t& mtop,
- const t_inputrec& ir,
- const matrix box,
- gmx::ArrayRef<const gmx::RVec> xGlobal)
-{
- // Ensure we don't write any output from this checking routine
- gmx::MDLogger dummyLogger;
-
- DDSystemInfo systemInfo =
- getSystemInfo(dummyLogger, ddRole, communicator, options, mtop, ir, box, xGlobal);
-
- DDSettings ddSettings = ddSettingsOriginal;
- ddSettings.request1D = true;
- const real gridSetupCellsizeLimit =
- getDDGridSetupCellSizeLimit(dummyLogger, !isDlbDisabled(ddSettings.initialDlbState),
- options.dlbScaling, ir, systemInfo.cellsizeLimit);
- gmx_ddbox_t ddbox = { 0 };
- DDGridSetup ddGridSetup =
- getDDGridSetup(dummyLogger, ddRole, communicator, numRanksRequested, options, ddSettings,
- systemInfo, gridSetupCellsizeLimit, mtop, ir, box, xGlobal, &ddbox);
-
- const bool canMake1DDD = (ddGridSetup.numDomains[XX] != 0);
-
- return canMake1DDD;
-}
-
-bool is1D(const gmx_domdec_t& dd)
-{
- const int maxDimensionSize = std::max(dd.numCells[XX], std::max(dd.numCells[YY], dd.numCells[ZZ]));
- const int productOfDimensionSizes = dd.numCells[XX] * dd.numCells[YY] * dd.numCells[ZZ];
- const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
-
- return decompositionHasOneDimension;
-}
-
namespace gmx
{
t_commrec* cr,
const DomdecOptions& options,
const MdrunOptions& mdrunOptions,
- bool prefer1D,
const gmx_mtop_t& mtop,
const t_inputrec& ir,
const matrix box,
t_commrec* cr,
const DomdecOptions& options,
const MdrunOptions& mdrunOptions,
- const bool prefer1D,
const gmx_mtop_t& mtop,
const t_inputrec& ir,
const matrix box,
ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
- if (prefer1D
- && canMake1DDomainDecomposition(ddSettings_, MASTER(cr_) ? DDRole::Master : DDRole::Agent,
- cr->mpiDefaultCommunicator, cr_->sizeOfDefaultCommunicator,
- options_, mtop_, ir_, box, xGlobal))
- {
- ddSettings_.request1D = true;
- }
-
if (ddSettings_.eFlop > 1)
{
/* Ensure that we have different random flop counts on different ranks */
t_commrec* cr,
const DomdecOptions& options,
const MdrunOptions& mdrunOptions,
- const bool prefer1D,
const gmx_mtop_t& mtop,
const t_inputrec& ir,
const matrix box,
ArrayRef<const RVec> xGlobal) :
- impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1D, mtop, ir, box, xGlobal))
+ impl_(new Impl(mdlog, cr, options, mdrunOptions, mtop, ir, box, xGlobal))
{
}
GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal),
"Non-local non-bonded stream should be valid when using "
"GPU halo exchange.");
- int gpuHaloExchangeSize = 0;
- int pulseStart = 0;
- if (cr.dd->gpuHaloExchange.empty())
+
+ if (cr.dd->gpuHaloExchange[0].empty())
{
GMX_LOG(mdlog.warning)
.asParagraph()
"by the "
"GMX_GPU_DD_COMMS environment variable.");
}
- else
- {
- gpuHaloExchangeSize = static_cast<int>(cr.dd->gpuHaloExchange.size());
- pulseStart = gpuHaloExchangeSize - 1;
- }
- if (cr.dd->comm->cd[0].numPulses() > gpuHaloExchangeSize)
+
+ for (int d = 0; d < cr.dd->ndim; d++)
{
- for (int pulse = pulseStart; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
+ for (int pulse = cr.dd->gpuHaloExchange[d].size(); pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
{
- cr.dd->gpuHaloExchange.push_back(std::make_unique<gmx::GpuHaloExchange>(
- cr.dd, cr.mpi_comm_mysim, deviceStreamManager.context(),
+ cr.dd->gpuHaloExchange[d].push_back(std::make_unique<gmx::GpuHaloExchange>(
+ cr.dd, d, cr.mpi_comm_mysim, deviceStreamManager.context(),
deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal),
deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal), pulse, wcycle));
}
const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,
const DeviceBuffer<gmx::RVec> d_forcesBuffer)
{
- for (int pulse = 0; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
+ for (int d = 0; d < cr.dd->ndim; d++)
{
- cr.dd->gpuHaloExchange[pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
+ for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
+ {
+ cr.dd->gpuHaloExchange[d][pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
+ }
}
}
const matrix box,
GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
{
- for (int pulse = 0; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
+ for (int d = 0; d < cr.dd->ndim; d++)
{
- cr.dd->gpuHaloExchange[pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
+ for (int pulse = 0; pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
+ {
+ cr.dd->gpuHaloExchange[d][pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
+ }
}
}
void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces)
{
- for (int pulse = cr.dd->comm->cd[0].numPulses() - 1; pulse >= 0; pulse--)
+ for (int d = cr.dd->ndim - 1; d >= 0; d--)
{
- cr.dd->gpuHaloExchange[pulse]->communicateHaloForces(accumulateForces);
+ for (int pulse = cr.dd->comm->cd[d].numPulses() - 1; pulse >= 0; pulse--)
+ {
+ cr.dd->gpuHaloExchange[d][pulse]->communicateHaloForces(accumulateForces);
+ }
}
}