Move GPU task assignment

author Mark Abraham <mark.j.abraham@gmail.com>

Mon, 30 Sep 2019 18:06:20 +0000 (20:06 +0200)

committer Magnus Lundborg <magnus.lundborg@scilifelab.se>

Tue, 1 Oct 2019 15:44:05 +0000 (17:44 +0200)
author Mark Abraham <mark.j.abraham@gmail.com>
Mon, 30 Sep 2019 18:06:20 +0000 (20:06 +0200)
committer Magnus Lundborg <magnus.lundborg@scilifelab.se>
Tue, 1 Oct 2019 15:44:05 +0000 (17:44 +0200)
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index 00f8b7f79fae2f36c58639e107416e0b50d82799..1793b91d542cdc75b7ad0fc5dc5b27bd1f28916a 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -1023,20 +1023,19 @@ int Mdrunner::mdrunner()
      prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
                            useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
  
-    const bool          prefer1DAnd1PulseDD = (c_enableGpuHaloExchange && useGpuForNonbonded);
-    LocalAtomSetManager atomSets;
+    const bool prefer1DAnd1PulseDD = (c_enableGpuHaloExchange && useGpuForNonbonded);
+    // This builder is necessary while we have multi-part construction
+    // of DD. Before DD is constructed, we use the existence of
+    // the builder object to indicate that further construction of DD
+    // is needed.
+    std::unique_ptr<DomainDecompositionBuilder> ddBuilder;
      if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
                       inputrec->eI == eiNM))
      {
-        DomainDecompositionBuilder ddBuilder(mdlog, cr, domdecOptions, mdrunOptions,
-                                             prefer1DAnd1PulseDD,
-                                             mtop, *inputrec,
-                                             box, positionsFromStatePointer(globalState.get()));
-        // TODO use cr->duty in task and GPU assignment so that
-        // ddBuilder can receive the GPU streams to use in buffer
-        // transfers (e.g. halo exchange)
-        cr->dd = ddBuilder.build(&atomSets);
-        // Note that local state still does not exist yet.
+        ddBuilder = std::make_unique<DomainDecompositionBuilder>
+                (mdlog, cr, domdecOptions, mdrunOptions,
+                prefer1DAnd1PulseDD, mtop, *inputrec,
+                box, positionsFromStatePointer(globalState.get()));
      }
      else
      {
@@ -1051,6 +1050,60 @@ int Mdrunner::mdrunner()
          }
      }
  
+    // Produce the task assignment for this rank.
+    GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
+    GpuTaskAssignments        gpuTaskAssignments =
+        gpuTaskAssignmentsBuilder.build(gpuIdsToUse,
+                                        userGpuTaskAssignment,
+                                        *hwinfo,
+                                        cr,
+                                        ms,
+                                        physicalNodeComm,
+                                        nonbondedTarget,
+                                        pmeTarget,
+                                        bondedTarget,
+                                        updateTarget,
+                                        useGpuForNonbonded,
+                                        useGpuForPme,
+                                        thisRankHasDuty(cr, DUTY_PP),
+                                        // TODO cr->duty & DUTY_PME should imply that a PME
+                                        // algorithm is active, but currently does not.
+                                        EEL_PME(inputrec->coulombtype) &&
+                                        thisRankHasDuty(cr, DUTY_PME));
+
+    const bool printHostName = (cr->nnodes > 1);
+    gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode);
+
+    // If the user chose a task assignment, give them some hints
+    // where appropriate.
+    if (!userGpuTaskAssignment.empty())
+    {
+        gpuTaskAssignments.logPerformanceHints(mdlog,
+                                               ssize(gpuIdsToUse));
+    }
+
+    // Get the device handles for the modules, nullptr when no task is assigned.
+    gmx_device_info_t *nonbondedDeviceInfo   = gpuTaskAssignments.initNonbondedDevice(cr);
+    gmx_device_info_t *pmeDeviceInfo         = gpuTaskAssignments.initPmeDevice();
+
+    // TODO Initialize GPU streams here.
+
+    // TODO Currently this is always built, yet DD partition code
+    // checks if it is built before using it. Probably it should
+    // become an MDModule that is made only when another module
+    // requires it (e.g. pull, CompEl, density fitting), so that we
+    // don't update the local atom sets unilaterally every step.
+    LocalAtomSetManager atomSets;
+    if (ddBuilder)
+    {
+        // TODO Pass the GPU streams to ddBuilder to use in buffer
+        // transfers (e.g. halo exchange)
+        cr->dd = ddBuilder->build(&atomSets);
+        // The builder's job is done, so destruct it
+        ddBuilder.reset(nullptr);
+        // Note that local state still does not exist yet.
+    }
+
      if (PAR(cr))
      {
          /* After possible communicator splitting in make_dd_communicators.
@@ -1114,49 +1167,6 @@ int Mdrunner::mdrunner()
          gmx_feenableexcept();
      }
  
-    // TODO This could move before init_domain_decomposition() as part
-    // of refactoring that separates the responsibility for duty
-    // assignment from setup for communication between tasks, and
-    // setup for tasks handled with a domain (ie including short-ranged
-    // tasks, bonded tasks, etc.).
-    //
-    // Note that in general useGpuForNonbonded, etc. can have a value
-    // that is inconsistent with the presence of actual GPUs on any
-    // rank, and that is not known to be a problem until the
-    // duty of the ranks on a node become known.
-
-    // Produce the task assignment for this rank.
-    GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
-    GpuTaskAssignments        gpuTaskAssignments =
-        gpuTaskAssignmentsBuilder.build(gpuIdsToUse,
-                                        userGpuTaskAssignment,
-                                        *hwinfo,
-                                        cr,
-                                        ms,
-                                        physicalNodeComm,
-                                        nonbondedTarget,
-                                        pmeTarget,
-                                        bondedTarget,
-                                        updateTarget,
-                                        useGpuForNonbonded,
-                                        useGpuForPme,
-                                        thisRankHasDuty(cr, DUTY_PP),
-                                        // TODO cr->duty & DUTY_PME should imply that a PME
-                                        // algorithm is active, but currently does not.
-                                        EEL_PME(inputrec->coulombtype) &&
-                                        thisRankHasDuty(cr, DUTY_PME));
-
-    const bool printHostName = (cr->nnodes > 1);
-    gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode);
-
-    // If the user chose a task assignment, give them some hints
-    // where appropriate.
-    if (!userGpuTaskAssignment.empty())
-    {
-        gpuTaskAssignments.logPerformanceHints(mdlog,
-                                               ssize(gpuIdsToUse));
-    }
-
      /* Now that we know the setup is consistent, check for efficiency */
      check_resource_division_efficiency(hwinfo,
                                         gpuTaskAssignments.thisRankHasAnyGpuTask(),
@@ -1164,11 +1174,6 @@ int Mdrunner::mdrunner()
                                         cr,
                                         mdlog);
  
-    // Get the device handles for the modules, nullptr when no task is assigned.
-    gmx_device_info_t *nonbondedDeviceInfo   = gpuTaskAssignments.initNonbondedDevice(cr);
-    gmx_device_info_t *pmeDeviceInfo         = gpuTaskAssignments.initPmeDevice();
-    const bool         thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
-
      /* getting number of PP/PME threads on this MPI / tMPI rank.
         PME: env variable should be read only on one node to make sure it is
         identical everywhere;
@@ -1229,6 +1234,7 @@ int Mdrunner::mdrunner()
                                   .checkpointOptions.period);
      }
  
+    const bool                   thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
      std::unique_ptr<MDAtoms>     mdAtoms;
      std::unique_ptr<gmx_vsite_t> vsite;
  
diff --git a/src/gromacs/taskassignment/taskassignment.h b/src/gromacs/taskassignment/taskassignment.h

index 28400bdccfcf6262f5d385d9f1e15c5e11b20aeb..689035806e8d454d6ff9f7e9201268d1252ca236 100644 (file)
--- a/src/gromacs/taskassignment/taskassignment.h
+++ b/src/gromacs/taskassignment/taskassignment.h
@@ -135,7 +135,8 @@ class GpuTaskAssignmentsBuilder
           *   - the possible existence of multi-simulations
           *
           * to assign the GPUs on each physical node to the tasks on
-         * the ranks of that node.
+         * the ranks of that node. It throws InconsistentInputError
+         * when a/the useful GPU task assignment is not possible.
           *
           * \param[in]  gpuIdsToUse            The compatible GPUs that the user permitted us to use.
           * \param[in]  userGpuTaskAssignment  The user-specified assignment of GPU tasks to device IDs.
author	Mark Abraham <mark.j.abraham@gmail.com>
	Mon, 30 Sep 2019 18:06:20 +0000 (20:06 +0200)
committer	Magnus Lundborg <magnus.lundborg@scilifelab.se>
	Tue, 1 Oct 2019 15:44:05 +0000 (17:44 +0200)
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/taskassignment/taskassignment.h		patch \| blob \| history