Combine initialization routines for GPUs assigned to PME and Nonbonded
authorArtem Zhmurov <zhmurov@gmail.com>
Thu, 13 Feb 2020 15:05:51 +0000 (16:05 +0100)
committerSzilárd Páll <pall.szilard@gmail.com>
Fri, 13 Mar 2020 11:42:34 +0000 (12:42 +0100)
The subroutines that do the initialization share a lot of logic. Also,
the current setup does not allow to have more than one GPU assigned to
a single rank. This make it natural to keep one handle to the GPU
device, if there is a GPU assigned to the rank.

Change-Id: I9177e1a6c00cf6efa7b58370b8fa6220c8fd4496

src/gromacs/mdrun/runner.cpp
src/gromacs/taskassignment/taskassignment.cpp
src/gromacs/taskassignment/taskassignment.h

index eddbe5efa74c4ca241683bd3a6ce11061f2528d6..65312657d8380b184e478ed0a84d2dd4903b26f6 100644 (file)
@@ -1141,18 +1141,16 @@ int Mdrunner::mdrunner()
             EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
 
     // Get the device handles for the modules, nullptr when no task is assigned.
-    // TODO: There should be only one DeviceInformation per rank.
-    DeviceInformation* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
-    DeviceInformation* pmeDeviceInfo       = gpuTaskAssignments.initPmeDevice();
-
+    int                            deviceId      = -1;
+    DeviceInformation*             deviceInfo    = gpuTaskAssignments.initDevice(&deviceId);
     std::unique_ptr<DeviceContext> deviceContext = nullptr;
-    if (pmeDeviceInfo)
-    {
-        deviceContext = std::make_unique<DeviceContext>(*pmeDeviceInfo);
-    }
-    else if (nonbondedDeviceInfo)
+    if (deviceInfo != nullptr)
     {
-        deviceContext = std::make_unique<DeviceContext>(*nonbondedDeviceInfo);
+        if (DOMAINDECOMP(cr) && thisRankHasDuty(cr, DUTY_PP))
+        {
+            dd_setup_dlb_resource_sharing(cr, deviceId);
+        }
+        deviceContext = std::make_unique<DeviceContext>(*deviceInfo);
     }
 
     // TODO Initialize GPU streams here.
@@ -1361,7 +1359,7 @@ int Mdrunner::mdrunner()
                     cr->mpi_comm_mysim, cr->dd->pme_nodeid, *deviceContext);
         }
 
-        fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec, fr, cr, *hwinfo, nonbondedDeviceInfo,
+        fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec, fr, cr, *hwinfo, deviceInfo,
                                         fr->deviceContext, &mtop, box, wcycle);
         if (useGpuForBonded)
         {
@@ -1452,12 +1450,12 @@ int Mdrunner::mdrunner()
     if (thisRankHasPmeGpuTask)
     {
         GMX_RELEASE_ASSERT(
-                pmeDeviceInfo != nullptr,
+                deviceInfo != nullptr,
                 "Device information can not be nullptr when building PME GPU program object.");
         GMX_RELEASE_ASSERT(
                 deviceContext != nullptr,
                 "Device context can not be nullptr when building PME GPU program object.");
-        pmeGpuProgram = buildPmeGpuProgram(*pmeDeviceInfo, *deviceContext);
+        pmeGpuProgram = buildPmeGpuProgram(*deviceInfo, *deviceContext);
     }
 
     /* Initiate PME if necessary,
@@ -1486,7 +1484,7 @@ int Mdrunner::mdrunner()
                 pmedata = gmx_pme_init(cr, getNumPmeDomains(cr->dd), inputrec, nChargePerturbed != 0,
                                        nTypePerturbed != 0, mdrunOptions.reproducible, ewaldcoeff_q,
                                        ewaldcoeff_lj, gmx_omp_nthreads_get(emntPME), pmeRunMode,
-                                       nullptr, pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
+                                       nullptr, deviceInfo, pmeGpuProgram.get(), mdlog);
             }
             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
         }
@@ -1699,8 +1697,7 @@ int Mdrunner::mdrunner()
         physicalNodeComm.barrier();
     }
 
-    free_gpu(nonbondedDeviceInfo);
-    free_gpu(pmeDeviceInfo);
+    free_gpu(deviceInfo);
     deviceContext.reset(nullptr);
     sfree(fcd);
 
index 5a62972d61001ad59172b8538fdab1f2c6c09860..8e0ace91cf50097bf28b4d5b1d358f107af0df59 100644 (file)
@@ -400,43 +400,40 @@ void GpuTaskAssignments::reportGpuUsage(const MDLogger& mdlog,
                         numRanksOnThisNode_, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
 }
 
-DeviceInformation* GpuTaskAssignments::initNonbondedDevice(const t_commrec* cr) const
+/*! \brief Function for whether the task of \c mapping has value \c TaskType.
+ *
+ * \param[in] mapping  Current GPU task mapping.
+ * \returns If \c TaskType task was assigned to the \c mapping.
+ */
+template<GpuTask TaskType>
+static bool hasTaskType(const GpuTaskMapping& mapping)
 {
-    DeviceInformation*       deviceInfo        = nullptr;
-    const GpuTaskAssignment& gpuTaskAssignment = assignmentForAllRanksOnThisNode_[indexOfThisRank_];
-
-    // This works because only one task of each type per rank is currently permitted.
-    auto nbGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(),
-                                         hasTaskType<GpuTask::Nonbonded>);
-    if (nbGpuTaskMapping != gpuTaskAssignment.end())
-    {
-        int deviceId = nbGpuTaskMapping->deviceId_;
-        deviceInfo   = getDeviceInfo(hardwareInfo_.gpu_info, deviceId);
-        init_gpu(deviceInfo);
+    return mapping.task_ == TaskType;
+}
 
-        // TODO Setting up this sharing should probably part of
-        // init_domain_decomposition after further refactoring.
-        if (DOMAINDECOMP(cr))
-        {
-            /* When we share GPUs over ranks, we need to know this for the DLB */
-            dd_setup_dlb_resource_sharing(cr, deviceId);
-        }
-    }
-    return deviceInfo;
+/*! \brief Function for whether the \c mapping has the GPU PME or Nonbonded task.
+ *
+ * \param[in] mapping  Current GPU task mapping.
+ * \returns If PME on Nonbonded GPU task was assigned to this mapping.
+ */
+static bool hasPmeOrNonbondedTask(const GpuTaskMapping& mapping)
+{
+    return hasTaskType<GpuTask::Pme>(mapping) || hasTaskType<GpuTask::Nonbonded>(mapping);
 }
 
-DeviceInformation* GpuTaskAssignments::initPmeDevice() const
+DeviceInformation* GpuTaskAssignments::initDevice(int* deviceId) const
 {
     DeviceInformation*       deviceInfo        = nullptr;
     const GpuTaskAssignment& gpuTaskAssignment = assignmentForAllRanksOnThisNode_[indexOfThisRank_];
 
-    // This works because only one task of each type is currently permitted.
-    auto       pmeGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(),
-                                          hasTaskType<GpuTask::Pme>);
-    const bool thisRankHasPmeGpuTask = (pmeGpuTaskMapping != gpuTaskAssignment.end());
-    if (thisRankHasPmeGpuTask)
+    // This works because only one task of each type per rank is currently permitted.
+    auto gpuTaskMapping =
+            std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), hasPmeOrNonbondedTask);
+
+    if (gpuTaskMapping != gpuTaskAssignment.end())
     {
-        deviceInfo = getDeviceInfo(hardwareInfo_.gpu_info, pmeGpuTaskMapping->deviceId_);
+        *deviceId  = gpuTaskMapping->deviceId_;
+        deviceInfo = getDeviceInfo(hardwareInfo_.gpu_info, *deviceId);
         init_gpu(deviceInfo);
     }
     return deviceInfo;
index c6ac9ac9de6b8b8730545d4197efc6572b8626ec..7e661e21668ca91f6f6d6dda19f28ef5bc52f7de 100644 (file)
@@ -239,33 +239,20 @@ public:
      * \param[in]  numCompatibleGpusOnThisNode  The number of compatible GPUs on this node.
      * */
     void logPerformanceHints(const MDLogger& mdlog, size_t numCompatibleGpusOnThisNode);
-    /*! \brief Return handle to the initialized GPU to use for the
-     * nonbonded task on this rank, if any.
+    /*! \brief Return handle to the initialized GPU to use in the this rank.
      *
-     * Returns nullptr if no such task is assigned to this rank.
+     * \param[out] deviceId Index of the assigned device.
      *
-     * \todo This also sets up DLB for device sharing, where
-     * appropriate, but that responsbility should move
-     * elsewhere. */
-    DeviceInformation* initNonbondedDevice(const t_commrec* cr) const;
-    /*! \brief Return handle to the initialized GPU to use for the
-     * PME task on this rank, if any.
-     *
-     * Returns nullptr if no such task is assigned to this rank. */
-    DeviceInformation* initPmeDevice() const;
+     * \returns Device information on the selected devicce. Returns nullptr if no GPU task
+     *          is assigned to this rank.
+     */
+    DeviceInformation* initDevice(int* deviceId) const;
     //! Return whether this rank has a PME task running on a GPU
     bool thisRankHasPmeGpuTask() const;
     //! Return whether this rank has any task running on a GPU
     bool thisRankHasAnyGpuTask() const;
 };
 
-//! Function for whether the task of \c mapping has value \c TaskType.
-template<GpuTask TaskType>
-bool hasTaskType(const GpuTaskMapping& mapping)
-{
-    return mapping.task_ == TaskType;
-}
-
 } // namespace gmx
 
 #endif