}
#endif
-void dd_setup_dlb_resource_sharing(t_commrec *cr,
+void dd_setup_dlb_resource_sharing(const t_commrec *cr,
int gpu_id)
{
#if GMX_MPI
* GPU finish. Therefore there wait times need to be averaged over the ranks
* sharing the same GPU. This function sets up the communication for that.
*/
-void dd_setup_dlb_resource_sharing(t_commrec *cr,
+void dd_setup_dlb_resource_sharing(const t_commrec *cr,
int gpu_id);
/*! \brief Cycle counter indices used internally in the domain decomposition */
gmx_fatal_mpi_va(f_errno, file, line, bMaster, bFinalize, fmt, ap);
va_end(ap);
}
+
+void simulationBarrier(const t_commrec *cr)
+{
+ if (PAR(cr))
+ {
+#if GMX_MPI
+ MPI_Barrier(cr->mpi_comm_mysim);
+#endif
+ }
+}
* for all processes.
*/
+//! Make a barrier across all ranks of this simulation
+void simulationBarrier(const t_commrec *cr);
+
#endif
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/utility/basedefinitions.h"
-#if 0
-} /* fixes auto-indentation problems */
-#endif
-
struct gmx_device_info_t;
-/* Possible results of the GPU detection/check.
+/*! \brief Possible results of the GPU detection/check.
*
* The egpuInsane value means that during the sanity checks an error
* occurred that indicates malfunctioning of the device, driver, or
egpuCompatible = 0, egpuNonexistent, egpuIncompatible, egpuIncompatibleClusterSize, egpuInsane, egpuNR
} e_gpu_detect_res_t;
-/* Names of the GPU detection/check results */
+/*! \brief Names of the GPU detection/check results
+ *
+ * \todo Make a proper class enumeration with helper string */
extern const char * const gpu_detect_res_str[egpuNR];
-/* GPU device information -- includes either CUDA or OpenCL devices.
- * The gmx_hardware_detect module initializes it. */
+/*! \brief Information about GPU devices on this physical node.
+ *
+ * Includes either CUDA or OpenCL devices. The gmx_hardware_detect
+ * module initializes it.
+ *
+ * \todo Use a std::vector */
struct gmx_gpu_info_t
{
- gmx_bool bDetectGPUs; /* Did we try to detect GPUs? */
- int n_dev; /* total number of GPU devices detected */
- struct gmx_device_info_t *gpu_dev; /* GPU devices detected in the system (per node) */
- int n_dev_compatible; /* number of compatible GPUs */
+ //! Did we attempt GPU detection?
+ gmx_bool bDetectGPUs;
+ //! Total number of GPU devices detected on this physical node
+ int n_dev;
+ //! Information about each GPU device detected on this physical node
+ gmx_device_info_t *gpu_dev;
+ //! Number of GPU devices detected on this physical node that are compatible.
+ int n_dev_compatible;
};
#endif
~gmx_hw_info_t();
/* Data for our local physical node */
- struct gmx_gpu_info_t gpu_info; /* Information about GPUs detected in the system */
-
- int nthreads_hw_avail; /* Number of hardware threads available; this number
- is based on the number of CPUs reported as available
- by the OS at the time of detection. */
+ //! Information about GPUs detected on this physical node
+ gmx_gpu_info_t gpu_info;
+
+ /*! \brief Number of hardware threads available.
+ *
+ * This number is based on the number of CPUs reported as
+ * available by the OS at the time of detection. */
+ int nthreads_hw_avail;
std::unique_ptr<gmx::CpuInfo> cpuInfo; /* Information about CPU capabilities */
"The -dd or -npme option request a parallel simulation, "
#if !GMX_MPI
"but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv));
-#else
-#if GMX_THREAD_MPI
+#elif GMX_THREAD_MPI
"but the number of MPI-threads (option -ntmpi) is not set or is 1");
#else
"but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv));
-#endif
#endif
}
gmx_feenableexcept();
}
- // Build a data structure that expresses which kinds of non-bonded
- // task are handled by this rank.
- //
- // TODO Later, this might become a loop over all registered modules
- // relevant to the mdp inputs, to find those that have such tasks.
- //
// TODO This could move before init_domain_decomposition() as part
// of refactoring that separates the responsibility for duty
// assignment from setup for communication between tasks, and
// that is inconsistent with the presence of actual GPUs on any
// rank, and that is not known to be a problem until the
// duty of the ranks on a node become known.
- //
- // TODO Later we might need the concept of computeTasksOnThisRank,
- // from which we construct gpuTasksOnThisRank.
- //
- // Currently the DD code assigns duty to ranks that can
- // include PP work that currently can be executed on a single
- // GPU, if present and compatible. This has to be coordinated
- // across PP ranks on a node, with possible multiple devices
- // or sharing devices on a node, either from the user
- // selection, or automatically.
- auto haveGpus = !gpuIdsToUse.empty();
- std::vector<GpuTask> gpuTasksOnThisRank;
- if (thisRankHasDuty(cr, DUTY_PP))
- {
- if (useGpuForNonbonded)
- {
- // Note that any bonded tasks on a GPU always accompany a
- // non-bonded task.
- if (haveGpus)
- {
- gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
- }
- else if (nonbondedTarget == TaskTarget::Gpu)
- {
- gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because no GPU is detected.");
- }
- else if (bondedTarget == TaskTarget::Gpu)
- {
- gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because no GPU is detected.");
- }
- }
- }
- // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not.
- if (EEL_PME(inputrec->coulombtype) && (thisRankHasDuty(cr, DUTY_PME)))
- {
- if (useGpuForPme)
- {
- if (haveGpus)
- {
- gpuTasksOnThisRank.push_back(GpuTask::Pme);
- }
- else if (pmeTarget == TaskTarget::Gpu)
- {
- gmx_fatal(FARGS, "Cannot run PME on a GPU because no GPU is detected.");
- }
- }
- }
- GpuTaskAssignment gpuTaskAssignment;
- try
- {
- // Produce the task assignment for this rank.
- gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
- mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank,
- useGpuForBonded, pmeRunMode);
- }
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
-
- /* Prevent other ranks from continuing after an issue was found
- * and reported as a fatal error.
- *
- * TODO This function implements a barrier so that MPI runtimes
- * can organize an orderly shutdown if one of the ranks has had to
- * issue a fatal error in various code already run. When we have
- * MPI-aware error handling and reporting, this should be
- * improved. */
-#if GMX_MPI
- if (PAR(cr))
- {
- MPI_Barrier(cr->mpi_comm_mysim);
- }
- if (isMultiSim(ms))
- {
- if (SIMMASTER(cr))
- {
- MPI_Barrier(ms->mpi_comm_masters);
- }
- /* We need another barrier to prevent non-master ranks from contiuing
- * when an error occured in a different simulation.
- */
- MPI_Barrier(cr->mpi_comm_mysim);
+ // Produce the task assignment for this rank.
+ GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
+ GpuTaskAssignments gpuTaskAssignments =
+ gpuTaskAssignmentsBuilder.build(gpuIdsToUse,
+ userGpuTaskAssignment,
+ *hwinfo,
+ cr,
+ ms,
+ physicalNodeComm,
+ nonbondedTarget,
+ pmeTarget,
+ bondedTarget,
+ updateTarget,
+ useGpuForNonbonded,
+ useGpuForPme,
+ thisRankHasDuty(cr, DUTY_PP),
+ // TODO cr->duty & DUTY_PME should imply that a PME
+ // algorithm is active, but currently does not.
+ EEL_PME(inputrec->coulombtype) &&
+ thisRankHasDuty(cr, DUTY_PME));
+
+ const bool printHostName = (cr->nnodes > 1);
+ gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode);
+
+ // If the user chose a task assignment, give them some hints
+ // where appropriate.
+ if (!userGpuTaskAssignment.empty())
+ {
+ gpuTaskAssignments.logPerformanceHints(mdlog,
+ ssize(gpuIdsToUse));
}
-#endif
/* Now that we know the setup is consistent, check for efficiency */
- check_resource_division_efficiency(hwinfo, !gpuTaskAssignment.empty(), mdrunOptions.ntompOptionIsSet,
- cr, mdlog);
-
- gmx_device_info_t *nonbondedDeviceInfo = nullptr;
-
- if (thisRankHasDuty(cr, DUTY_PP))
- {
- // This works because only one task of each type is currently permitted.
- auto nbGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(),
- hasTaskType<GpuTask::Nonbonded>);
- if (nbGpuTaskMapping != gpuTaskAssignment.end())
- {
- int nonbondedDeviceId = nbGpuTaskMapping->deviceId_;
- nonbondedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, nonbondedDeviceId);
- init_gpu(nonbondedDeviceInfo);
-
- if (DOMAINDECOMP(cr))
- {
- /* When we share GPUs over ranks, we need to know this for the DLB */
- dd_setup_dlb_resource_sharing(cr, nonbondedDeviceId);
- }
-
- }
- }
-
- gmx_device_info_t *pmeDeviceInfo = nullptr;
+ check_resource_division_efficiency(hwinfo,
+ gpuTaskAssignments.thisRankHasAnyGpuTask(),
+ mdrunOptions.ntompOptionIsSet,
+ cr,
+ mdlog);
+
+ // Get the device handles for the modules, nullptr when no task is assigned.
+ gmx_device_info_t *nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
+ gmx_device_info_t *pmeDeviceInfo = gpuTaskAssignments.initPmeDevice();
+ const bool thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
+
+ // TODO should live in ewald module once its testing is improved
+ //
// Later, this program could contain kernels that might be later
// re-used as auto-tuning progresses, or subsequent simulations
// are invoked.
PmeGpuProgramStorage pmeGpuProgram;
- // This works because only one task of each type is currently permitted.
- auto pmeGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), hasTaskType<GpuTask::Pme>);
- const bool thisRankHasPmeGpuTask = (pmeGpuTaskMapping != gpuTaskAssignment.end());
if (thisRankHasPmeGpuTask)
{
- pmeDeviceInfo = getDeviceInfo(hwinfo->gpu_info, pmeGpuTaskMapping->deviceId_);
- init_gpu(pmeDeviceInfo);
pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
}
{
return (isMaster && isMasterSim(ms));
}
+
+void multiSimBarrier(const gmx_multisim_t *ms)
+{
+ if (isMultiSim(ms))
+ {
+#if GMX_MPI
+ if (ms->mpi_comm_masters != MPI_COMM_NULL)
+ {
+ MPI_Barrier(ms->mpi_comm_masters);
+ }
+#endif
+ }
+}
bool isMasterSimMasterRank(const gmx_multisim_t *ms,
bool isMaster);
+//! Make a barrier across all multi-simulation master ranks
+void multiSimBarrier(const gmx_multisim_t *ms);
+
#endif
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <numeric>
#include <vector>
+#include "gromacs/taskassignment/decidegpuusage.h"
+#include "gromacs/taskassignment/taskassignment.h"
+#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/physicalnodecommunicator.h"
namespace gmx
{
+std::vector<GpuTask>
+findGpuTasksOnThisRank(const bool haveGpusOnThisPhysicalNode,
+ const TaskTarget nonbondedTarget,
+ const TaskTarget pmeTarget,
+ const TaskTarget bondedTarget,
+ const TaskTarget updateTarget,
+ const bool useGpuForNonbonded,
+ const bool useGpuForPme,
+ const bool rankHasPpTask,
+ const bool rankHasPmeTask)
+{
+ std::vector<GpuTask> gpuTasksOnThisRank;
+ if (rankHasPpTask)
+ {
+ if (useGpuForNonbonded)
+ {
+ // Note that any bonded tasks on a GPU always accompany a
+ // non-bonded task.
+ if (haveGpusOnThisPhysicalNode)
+ {
+ gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
+ }
+ else if (nonbondedTarget == TaskTarget::Gpu)
+ {
+ gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because no GPU is detected.");
+ }
+ else if (bondedTarget == TaskTarget::Gpu)
+ {
+ gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because no GPU is detected.");
+ }
+ else if (updateTarget == TaskTarget::Gpu)
+ {
+ gmx_fatal(FARGS, "Cannot run coordinate update on a GPU because no GPU is detected.");
+ }
+ }
+ }
+ if (rankHasPmeTask)
+ {
+ if (useGpuForPme)
+ {
+ if (haveGpusOnThisPhysicalNode)
+ {
+ gpuTasksOnThisRank.push_back(GpuTask::Pme);
+ }
+ else if (pmeTarget == TaskTarget::Gpu)
+ {
+ gmx_fatal(FARGS, "Cannot run PME on a GPU because no GPU is detected.");
+ }
+ }
+ }
+ return gpuTasksOnThisRank;
+}
+
namespace
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_TASKASSIGNMENT_FINDALLGPUTASKS_H
#define GMX_TASKASSIGNMENT_FINDALLGPUTASKS_H
-#include "gromacs/taskassignment/taskassignment.h"
-#include "gromacs/utility/arrayref.h"
+#include <vector>
namespace gmx
{
+enum class GpuTask;
+enum class TaskTarget;
class PhysicalNodeCommunicator;
+template <typename T> class ArrayRef;
+//! Container of compute tasks suitable to run on a GPU e.g. on each rank of a node.
+using GpuTasksOnRanks = std::vector< std::vector<GpuTask> >;
+
+/*! \brief Returns container of all tasks on this rank
+ * that are eligible for GPU execution.
+ *
+ * \param[in] haveGpusOnThisPhysicalNode Whether there are any GPUs on this physical node.
+ * \param[in] nonbondedTarget The user's choice for mdrun -nb for where to assign
+ * short-ranged nonbonded interaction tasks.
+ * \param[in] pmeTarget The user's choice for mdrun -pme for where to assign
+ * long-ranged PME nonbonded interaction tasks.
+ * \param[in] bondedTarget The user's choice for mdrun -bonded for where to assign tasks.
+ * \param[in] updateTarget The user's choice for mdrun -update for where to assign tasks.
+ * \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions.
+ * \param[in] useGpuForPme Whether GPUs will be used for PME interactions.
+ * \param[in] rankHasPpTask Whether this rank has a PP task
+ * \param[in] rankHasPmeTask Whether this rank has a PME task
+ */
+std::vector<GpuTask>
+findGpuTasksOnThisRank(bool haveGpusOnThisPhysicalNode,
+ TaskTarget nonbondedTarget,
+ TaskTarget pmeTarget,
+ TaskTarget bondedTarget,
+ TaskTarget updateTarget,
+ bool useGpuForNonbonded,
+ bool useGpuForPme,
+ bool rankHasPpTask,
+ bool rankHasPmeTask);
/*! \brief Returns container of all tasks on all ranks of this node
* that are eligible for GPU execution.
#include "gromacs/ewald/pme.h"
#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/taskassignment/taskassignment.h"
+#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/logger.h"
#include "gromacs/utility/stringutil.h"
* GPUs used (per node) can be different from the number of GPU IDs
* used.
*/
-size_t countUniqueGpuIdsUsed(const GpuTaskAssignments &gpuTaskAssignmentOnRanksOfThisNode)
+size_t
+countUniqueGpuIdsUsed(ArrayRef<const GpuTaskAssignment> gpuTaskAssignmentOnRanksOfThisNode)
{
std::set<int> uniqueIds;
for (const auto &assignmentsOnRank : gpuTaskAssignmentOnRanksOfThisNode)
} // namespace
void
-reportGpuUsage(const MDLogger &mdlog,
- const GpuTaskAssignments &gpuTaskAssignmentOnRanksOfThisNode,
- size_t numGpuTasksOnThisNode,
- size_t numRanks,
- bool bPrintHostName,
- bool useGpuForBonded,
- PmeRunMode pmeRunMode)
+reportGpuUsage(const MDLogger &mdlog,
+ ArrayRef<const GpuTaskAssignment> gpuTaskAssignmentOnRanksOfThisNode,
+ size_t numGpuTasksOnThisNode,
+ size_t numRanks,
+ bool printHostName,
+ bool useGpuForBonded,
+ PmeRunMode pmeRunMode)
{
size_t numGpusInUse = countUniqueGpuIdsUsed(gpuTaskAssignmentOnRanksOfThisNode);
if (numGpusInUse == 0)
}
bool bPluralGpus = numGpusInUse > 1;
- if (bPrintHostName)
+ if (printHostName)
{
char host[STRLEN];
gmx_gethostname(host, STRLEN);
#include <cstdlib>
-#include "gromacs/taskassignment/taskassignment.h"
+#include <vector>
enum class PmeRunMode;
{
class MDLogger;
+struct GpuTaskMapping;
+template <typename T> class ArrayRef;
+using GpuTaskAssignment = std::vector <GpuTaskMapping>;
/*! \brief Log a report on how GPUs are being used on
* the ranks of the physical node of rank 0 of the simulation.
* \param[in] gpuTaskAssignmentOnRanksOfThisNode The selected GPU IDs.
* \param[in] numGpuTasksOnThisNode The number of GPU tasks on this node.
* \param[in] numPpRanks Number of PP ranks on this node
- * \param[in] bPrintHostName Print the hostname in the usage information
+ * \param[in] printHostName Print the hostname in the usage information
* \param[in] useGpuForBonded Whether GPU PP tasks will do bonded work on the GPU
* \param[in] pmeRunMode Describes the execution of PME tasks
*
* \throws std::bad_alloc if out of memory */
void
-reportGpuUsage(const MDLogger &mdlog,
- const GpuTaskAssignments &gpuTaskAssignmentOnRanksOfThisNode,
- size_t numGpuTasksOnThisNode,
- size_t numPpRanks,
- bool bPrintHostName,
- bool useGpuForBonded,
- PmeRunMode pmeRunMode);
+reportGpuUsage(const MDLogger &mdlog,
+ ArrayRef<const GpuTaskAssignment> gpuTaskAssignmentOnRanksOfThisNode,
+ size_t numGpuTasksOnThisNode,
+ size_t numPpRanks,
+ bool printHostName,
+ bool useGpuForBonded,
+ PmeRunMode pmeRunMode);
} // namespace gmx
#include "taskassignment.h"
-#include "config.h"
-
+#include <algorithm>
#include <string>
#include <vector>
+#include "gromacs/domdec/domdec.h"
+#include "gromacs/gmxlib/network.h"
+#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/hardware/hw_info.h"
#include "gromacs/mdrunutility/multisim.h"
#include "gromacs/mdtypes/commrec.h"
* that are eligible to run on GPUs.
* \param[in] gpuIds The user-supplied GPU IDs.
*/
-GpuTaskAssignments
+std::vector<GpuTaskAssignment>
buildTaskAssignment(const GpuTasksOnRanks &gpuTasksOnRanksOfThisNode,
ArrayRef<const int> gpuIds)
{
- GpuTaskAssignments gpuTaskAssignmentOnRanksOfThisNode(gpuTasksOnRanksOfThisNode.size());
+ std::vector<GpuTaskAssignment> gpuTaskAssignmentOnRanksOfThisNode(gpuTasksOnRanksOfThisNode.size());
// Loop over the ranks on this node, and the tasks on each
// rank. For each task, take the next device ID from those
*
* Sharing GPUs among multiple ranks is possible via either user or
* automated selection. */
-bool isAnyGpuSharedBetweenRanks(const GpuTaskAssignments &gpuTaskAssignments)
+bool isAnyGpuSharedBetweenRanks(ArrayRef<const GpuTaskAssignment> gpuTaskAssignments)
{
// Loop over all ranks i, looking on all higher ranks j whether
// any tasks on them share GPU device IDs.
return false;
}
-//! Logs to \c mdlog information that may help a user learn how to let mdrun make a task assignment that runs faster.
-void logPerformanceHints(const MDLogger &mdlog,
- size_t numCompatibleGpus,
- size_t numGpuTasksOnThisNode,
- const GpuTaskAssignments &gpuTaskAssignments)
+} // namespace
+
+void
+GpuTaskAssignments::logPerformanceHints(const MDLogger &mdlog,
+ size_t numCompatibleGpusOnThisNode)
{
- if (numCompatibleGpus > numGpuTasksOnThisNode)
+ if (numCompatibleGpusOnThisNode > numGpuTasksOnThisNode_)
{
/* TODO In principle, this warning could be warranted only on
* some nodes, but we lack the infrastructure to do a good job
"available on that node are unused, which might not be optimal.");
}
- if (isAnyGpuSharedBetweenRanks(gpuTaskAssignments))
+ if (isAnyGpuSharedBetweenRanks(assignmentForAllRanksOnThisNode_))
{
GMX_LOG(mdlog.warning).asParagraph().
appendText("NOTE: You assigned the same GPU ID(s) to multiple ranks, which is a good idea if you have measured the performance of alternatives.");
}
}
+namespace
+{
+
//! Counts all the GPU tasks on this node.
size_t countGpuTasksOnThisNode(const GpuTasksOnRanks &gpuTasksOnRanksOfThisNode)
{
} // namespace
-GpuTaskAssignments::value_type
-runTaskAssignment(const std::vector<int> &gpuIdsToUse,
- const std::vector<int> &userGpuTaskAssignment,
- const gmx_hw_info_t &hardwareInfo,
- const MDLogger &mdlog,
- const t_commrec *cr,
- const gmx_multisim_t *ms,
- const PhysicalNodeCommunicator &physicalNodeComm,
- const std::vector<GpuTask> &gpuTasksOnThisRank,
- bool useGpuForBonded,
- PmeRunMode pmeRunMode)
+GpuTaskAssignmentsBuilder::GpuTaskAssignmentsBuilder() = default;
+
+GpuTaskAssignments
+GpuTaskAssignmentsBuilder::build(const std::vector<int> &gpuIdsToUse,
+ const std::vector<int> &userGpuTaskAssignment,
+ const gmx_hw_info_t &hardwareInfo,
+ const t_commrec *cr,
+ const gmx_multisim_t *ms,
+ const PhysicalNodeCommunicator &physicalNodeComm,
+ const TaskTarget nonbondedTarget,
+ const TaskTarget pmeTarget,
+ const TaskTarget bondedTarget,
+ const TaskTarget updateTarget,
+ const bool useGpuForNonbonded,
+ const bool useGpuForPme,
+ bool rankHasPpTask,
+ bool rankHasPmeTask)
{
+ size_t numRanksOnThisNode = physicalNodeComm.size_;
+ std::vector<GpuTask> gpuTasksOnThisRank = findGpuTasksOnThisRank(!gpuIdsToUse.empty(),
+ nonbondedTarget,
+ pmeTarget,
+ bondedTarget,
+ updateTarget,
+ useGpuForNonbonded,
+ useGpuForPme,
+ rankHasPpTask,
+ rankHasPmeTask);
/* Communicate among ranks on this node to find each task that can
* be executed on a GPU, on each rank. */
- auto gpuTasksOnRanksOfThisNode = findAllGpuTasksOnThisNode(gpuTasksOnThisRank,
- physicalNodeComm);
- auto numGpuTasksOnThisNode = countGpuTasksOnThisNode(gpuTasksOnRanksOfThisNode);
+ auto gpuTasksOnRanksOfThisNode = findAllGpuTasksOnThisNode(gpuTasksOnThisRank,
+ physicalNodeComm);
+ size_t numGpuTasksOnThisNode = countGpuTasksOnThisNode(gpuTasksOnRanksOfThisNode);
- GpuTaskAssignments taskAssignmentOnRanksOfThisNode;
+ std::vector<GpuTaskAssignment> taskAssignmentOnRanksOfThisNode;
try
{
// Use the GPU IDs from the user if they supplied
ArrayRef<const int> compatibleGpusToUse = gpuIdsToUse;
// enforce the single device/rank restriction
- if (physicalNodeComm.size_ == 1 && !compatibleGpusToUse.empty())
+ if (numRanksOnThisNode == 1 && !compatibleGpusToUse.empty())
{
compatibleGpusToUse = compatibleGpusToUse.subArray(0, 1);
}
printFatalErrorMessage(stderr, ex);
}
- if (PAR(cr))
- {
-#if GMX_MPI
- MPI_Barrier(cr->mpi_comm_mysim);
-#endif
- }
- if (isMultiSim(ms))
+ gmx_exit_on_fatal_error(ExitType_Abort, 1);
+ }
+ // TODO This implements a global barrier so that MPI runtimes can
+ // organize an orderly shutdown if one of the ranks has had to
+ // issue a fatal error after an exception detected only on one
+ // rank. When we have MPI-aware error handling and reporting, this
+ // should be improved.
+ multiSimBarrier(ms);
+ simulationBarrier(cr);
+
+ // TODO There is no check that mdrun -nb gpu or -pme gpu or
+ // -gpu_id is actually being implemented such that nonbonded tasks
+ // are being run on compatible GPUs, on all applicable ranks. That
+ // would require communication.
+
+ GpuTaskAssignments gpuTaskAssignments(hardwareInfo);
+ gpuTaskAssignments.assignmentForAllRanksOnThisNode_ = taskAssignmentOnRanksOfThisNode;
+ gpuTaskAssignments.indexOfThisRank_ = physicalNodeComm.rank_;
+ gpuTaskAssignments.numGpuTasksOnThisNode_ = numGpuTasksOnThisNode;
+ gpuTaskAssignments.numRanksOnThisNode_ = numRanksOnThisNode;
+ return gpuTaskAssignments;
+}
+
+GpuTaskAssignments::GpuTaskAssignments(const gmx_hw_info_t &hardwareInfo)
+ : hardwareInfo_(hardwareInfo)
+{
+}
+
+void
+GpuTaskAssignments::reportGpuUsage(const MDLogger &mdlog,
+ bool printHostName,
+ bool useGpuForBonded,
+ PmeRunMode pmeRunMode)
+{
+ gmx::reportGpuUsage(mdlog,
+ assignmentForAllRanksOnThisNode_,
+ numGpuTasksOnThisNode_,
+ numRanksOnThisNode_,
+ printHostName,
+ useGpuForBonded,
+ pmeRunMode);
+}
+
+gmx_device_info_t *
+GpuTaskAssignments::initNonbondedDevice(const t_commrec *cr) const
+{
+ gmx_device_info_t *deviceInfo = nullptr;
+ const GpuTaskAssignment &gpuTaskAssignment =
+ assignmentForAllRanksOnThisNode_[indexOfThisRank_];
+
+ // This works because only one task of each type per rank is currently permitted.
+ auto nbGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(),
+ hasTaskType<GpuTask::Nonbonded>);
+ if (nbGpuTaskMapping != gpuTaskAssignment.end())
+ {
+ int deviceId = nbGpuTaskMapping->deviceId_;
+ deviceInfo = getDeviceInfo(hardwareInfo_.gpu_info, deviceId);
+ init_gpu(deviceInfo);
+
+ // TODO Setting up this sharing should probably part of
+ // init_domain_decomposition after further refactoring.
+ if (DOMAINDECOMP(cr))
{
-#if GMX_MPI
- MPI_Barrier(ms->mpi_comm_masters);
-#endif
+ /* When we share GPUs over ranks, we need to know this for the DLB */
+ dd_setup_dlb_resource_sharing(cr, deviceId);
}
-
- gmx_exit_on_fatal_error(ExitType_Abort, 1);
}
+ return deviceInfo;
+}
- reportGpuUsage(mdlog, taskAssignmentOnRanksOfThisNode,
- numGpuTasksOnThisNode, physicalNodeComm.size_, cr->nnodes > 1,
- useGpuForBonded, pmeRunMode);
+gmx_device_info_t *
+GpuTaskAssignments::initPmeDevice() const
+{
+ gmx_device_info_t *deviceInfo = nullptr;
+ const GpuTaskAssignment &gpuTaskAssignment =
+ assignmentForAllRanksOnThisNode_[indexOfThisRank_];
- // If the user chose a task assignment, give them some hints where appropriate.
- if (!userGpuTaskAssignment.empty())
+ // This works because only one task of each type is currently permitted.
+ auto pmeGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(),
+ hasTaskType<GpuTask::Pme>);
+ const bool thisRankHasPmeGpuTask = (pmeGpuTaskMapping != gpuTaskAssignment.end());
+ if (thisRankHasPmeGpuTask)
{
- logPerformanceHints(mdlog, gpuIdsToUse.size(),
- numGpuTasksOnThisNode,
- taskAssignmentOnRanksOfThisNode);
+ deviceInfo = getDeviceInfo(hardwareInfo_.gpu_info, pmeGpuTaskMapping->deviceId_);
+ init_gpu(deviceInfo);
}
+ return deviceInfo;
+}
- return taskAssignmentOnRanksOfThisNode[physicalNodeComm.rank_];
+bool
+GpuTaskAssignments::thisRankHasPmeGpuTask() const
+{
+ const GpuTaskAssignment &gpuTaskAssignment =
+ assignmentForAllRanksOnThisNode_[indexOfThisRank_];
- // TODO There is no check that mdrun -nb gpu or -pme gpu or
- // -gpu_id is actually being implemented such that nonbonded tasks
- // are being run on compatible GPUs, on all applicable ranks. That
- // would require communication.
+ auto pmeGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), hasTaskType<GpuTask::Pme>);
+ const bool thisRankHasPmeGpuTask = (pmeGpuTaskMapping != gpuTaskAssignment.end());
+
+ return thisRankHasPmeGpuTask;
+}
+
+bool
+GpuTaskAssignments::thisRankHasAnyGpuTask() const
+{
+ const GpuTaskAssignment &gpuTaskAssignment =
+ assignmentForAllRanksOnThisNode_[indexOfThisRank_];
+
+ const bool thisRankHasAnyGpuTask = !gpuTaskAssignment.empty();
+ return thisRankHasAnyGpuTask;
}
} // namespace gmx
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <vector>
+#include "gromacs/utility/basedefinitions.h"
+
+struct gmx_device_info_t;
struct gmx_hw_info_t;
struct gmx_multisim_t;
struct t_commrec;
namespace gmx
{
+enum class TaskTarget;
class MDLogger;
class PhysicalNodeCommunicator;
//! Container of GPU tasks on a rank, specifying the task type and GPU device ID, e.g. potentially ready for consumption by the modules on that rank.
using GpuTaskAssignment = std::vector <GpuTaskMapping>;
-//! Container of compute tasks suitable to run on a GPU e.g. on each rank of a node.
-using GpuTasksOnRanks = std::vector< std::vector<GpuTask> >;
-//! Container of RankGpuTaskAssignments e.g. for all ranks on a node.
-using GpuTaskAssignments = std::vector<GpuTaskAssignment>;
-/*! \brief Coordinate the final stages of task assignment and
- * reporting, and return the assignment for this rank.
+class GpuTaskAssignments;
+
+/*! \libinternal
+ * \brief Builder for the GpuTaskAssignments for all ranks on this
+ * node.
+ *
+ * This will coordinate the final stages of task assignment and
+ * reporting, and build the GpuTaskAssignments object used to
+ * configure the modules that might run tasks on GPUs.
*
* Communicates between ranks on a node to coordinate task assignment
* between them onto available hardware, e.g. accelerators.
*
- * Releases the taskAssigner once its work is complete.
+ * \todo Later, this might become a loop over all registered modules
+ * relevant to the mdp inputs, to find those that have such tasks.
*
- * \param[in] gpuIdsToUse The compatible GPUs that the user permitted us to use.
- * \param[in] userGpuTaskAssignment The user-specified assignment of GPU tasks to device IDs.
- * \param[in] hardwareInfo The detected hardware
- * \param[in] mdlog Logging object to write to.
- * \param[in] cr Communication object.
- * \param[in] ms Multi-simulation handler.
- * \param[in] physicalNodeComm Communication object for this physical node.
- * \param[in] gpuTasksOnThisRank Information about what GPU tasks
- * exist on this rank.
- * \param[in] useGpuForBonded Whether GPU PP tasks will do bonded work on the GPU
- * \param[in] pmeRunMode Describes the execution of PME tasks
+ * \todo Later we might need the concept of computeTasksOnThisRank,
+ * from which we construct gpuTasksOnThisRank.
*
- * \returns A GPU task assignment for this rank.
+ * Currently the DD code assigns duty to ranks that can
+ * include PP work that currently can be executed on a single
+ * GPU, if present and compatible. This has to be coordinated
+ * across PP ranks on a node, with possible multiple devices
+ * or sharing devices on a node, either from the user
+ * selection, or automatically. */
+class GpuTaskAssignmentsBuilder
+{
+ public:
+ //! Constructor
+ GpuTaskAssignmentsBuilder();
+
+ /*! \brief Builds a GpuTaskAssignments
+ *
+ * This method reconciles
+ *
+ * - user mdrun command-line options,
+ * - the results of hardware detection
+ * - the duty assigned by the DD setup,
+ * - the requested simulation modules, and
+ * - the possible existence of multi-simulations
+ *
+ * to assign the GPUs on each physical node to the tasks on
+ * the ranks of that node.
+ *
+ * \param[in] gpuIdsToUse The compatible GPUs that the user permitted us to use.
+ * \param[in] userGpuTaskAssignment The user-specified assignment of GPU tasks to device IDs.
+ * \param[in] hardwareInfo The detected hardware
+ * \param[in] cr Communication object.
+ * \param[in] ms Multi-simulation handler.
+ * \param[in] physicalNodeComm Communication object for this physical node.
+ * \param[in] nonbondedTarget The user's choice for mdrun -nb for where to assign
+ * short-ranged nonbonded interaction tasks.
+ * \param[in] pmeTarget The user's choice for mdrun -pme for where to assign
+ * long-ranged PME nonbonded interaction tasks.
+ * \param[in] bondedTarget The user's choice for mdrun -bonded for where to assign tasks.
+ * \param[in] updateTarget The user's choice for mdrun -update for where to assign tasks.
+ * \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions.
+ * \param[in] useGpuForPme Whether GPUs will be used for PME interactions.
+ * \param[in] rankHasPpTask Whether this rank has a PP task
+ * \param[in] rankHasPmeTask Whether this rank has a PME task
+ *
+ * \throws std::bad_alloc If out of memory.
+ * InconsistentInputError If user and/or detected inputs are inconsistent.
+ */
+ GpuTaskAssignments build(const std::vector<int> &gpuIdsToUse,
+ const std::vector<int> &userGpuTaskAssignment,
+ const gmx_hw_info_t &hardwareInfo,
+ const t_commrec *cr,
+ const gmx_multisim_t *ms,
+ const PhysicalNodeCommunicator &physicalNodeComm,
+ TaskTarget nonbondedTarget,
+ TaskTarget pmeTarget,
+ TaskTarget bondedTarget,
+ TaskTarget updateTarget,
+ bool useGpuForNonbonded,
+ bool useGpuForPme,
+ bool rankHasPpTask,
+ bool rankHasPmeTask);
+};
+
+/*! \libinternal
+ * \brief Contains the GPU task assignment for all ranks on this
+ * physical node.
*
- * \throws std::bad_alloc If out of memory.
- * InconsistentInputError If user and/or detected inputs are inconsistent.
- */
-GpuTaskAssignments::value_type
-runTaskAssignment(const std::vector<int> &gpuIdsToUse,
- const std::vector<int> &userGpuTaskAssignment,
- const gmx_hw_info_t &hardwareInfo,
- const MDLogger &mdlog,
- const t_commrec *cr,
- const gmx_multisim_t *ms,
- const PhysicalNodeCommunicator &physicalNodeComm,
- const std::vector<GpuTask> &gpuTasksOnThisRank,
- bool useGpuForBonded,
- PmeRunMode pmeRunMode);
+ * This can be used to configure the modules that might run tasks on
+ * GPUs.
+ *
+ * This assignment is made by a GpuTaskAssignmentsBuilder object. */
+class GpuTaskAssignments
+{
+ public:
+ //! Public move constructor to use with the builder
+ GpuTaskAssignments(GpuTaskAssignments &&source) noexcept = default;
+ private:
+ // Let the builder handle construction
+ friend class GpuTaskAssignmentsBuilder;
+ //! Private constructor so only the builder can construct
+ GpuTaskAssignments(const gmx_hw_info_t &hardwareInfo);
+ /*! \brief Information about hardware on this physical node
+ *
+ * The lifetime of the object referred to must exceed that
+ * of this object. */
+ const gmx_hw_info_t &hardwareInfo_;
+ //! The GPU task assignment for all ranks on this node
+ std::vector<GpuTaskAssignment> assignmentForAllRanksOnThisNode_;
+ /*! \brief The index of this rank within those on this node.
+ *
+ * This is useful for indexing into \c
+ * assignmentForAllRanksOnThisNode_. */
+ index indexOfThisRank_ = -1;
+ //! Number of GPU tasks on this node.
+ size_t numGpuTasksOnThisNode_ = 0;
+ //! Number of ranks on this physical node.
+ size_t numRanksOnThisNode_ = 0;
+ public:
+ /*! \brief Log a report on how GPUs are being used on
+ * the ranks of the physical node of rank 0 of the simulation.
+ *
+ * \todo It could be useful to report also whether any nodes differed,
+ * and in what way.
+ *
+ * \param[in] mdlog Logging object.
+ * \param[in] printHostName Print the hostname in the usage information
+ * \param[in] useGpuForBonded Whether GPU PP tasks will do bonded work on the GPU
+ * \param[in] pmeRunMode Describes the execution of PME tasks
+ *
+ * \throws std::bad_alloc if out of memory */
+ void
+ reportGpuUsage(const MDLogger &mdlog,
+ bool printHostName,
+ bool useGpuForBonded,
+ PmeRunMode pmeRunMode);
+ /*! \brief Logs to \c mdlog information that may help a user
+ * learn how to let mdrun make a task assignment that runs
+ * faster.
+ *
+ * \param[in] mdlog Logging object.
+ * \param[in] numCompatibleGpusOnThisNode The number of compatible GPUs on this node.
+ * */
+ void logPerformanceHints(const MDLogger &mdlog,
+ size_t numCompatibleGpusOnThisNode);
+ /*! \brief Return handle to the initialized GPU to use for the
+ * nonbonded task on this rank, if any.
+ *
+ * Returns nullptr if no such task is assigned to this rank.
+ *
+ * \todo This also sets up DLB for device sharing, where
+ * appropriate, but that responsbility should move
+ * elsewhere. */
+ gmx_device_info_t *initNonbondedDevice(const t_commrec *cr) const;
+ /*! \brief Return handle to the initialized GPU to use for the
+ * PME task on this rank, if any.
+ *
+ * Returns nullptr if no such task is assigned to this rank. */
+ gmx_device_info_t *initPmeDevice() const;
+ //! Return whether this rank has a PME task running on a GPU
+ bool thisRankHasPmeGpuTask() const;
+ //! Return whether this rank has any task running on a GPU
+ bool thisRankHasAnyGpuTask() const;
+};
//! Function for whether the task of \c mapping has value \c TaskType.
template<GpuTask TaskType>
parseUserGpuIdString(const std::string &gpuIdString);
/*! \brief Implement GPU ID selection by returning the available GPU
- * IDs that are compatible.
+ * IDs on this physical node that are compatible.
*
* If the string supplied by the user is empty, then return the IDs of
- * all compatible GPUs. Otherwise, check the user specified compatible
- * GPUs and return their IDs.
+ * all compatible GPUs on this physical node. Otherwise, check the
+ * user specified compatible GPUs and return their IDs.
*
- * \param[in] gpuInfo Information detected about GPUs
+ * \param[in] gpuInfo Information detected about GPUs on this physical node
* \param[in] gpuIdsAvailableString String like "013" or "0,1,3" typically
* supplied by the user to mdrun -gpu_id.
* Must contain only unique decimal digits, or only decimal
* comma is accceptable (and required to specify a
* single ID that is larger than 9).
*
- * \returns A vector of unique compatible GPU IDs.
+ * \returns A vector of unique compatible GPU IDs on this physical node.
*
* \throws std::bad_alloc If out of memory.
* InvalidInputError If an invalid character is found (ie not a digit or ',') or if