Disable DLB if GPU direct communication Halo exchange is enabled.

author Gaurav Garg <gaugarg@nvidia.com>

Fri, 27 Aug 2021 07:00:46 +0000 (07:00 +0000)

committer Mark Abraham <mark.j.abraham@gmail.com>

Fri, 27 Aug 2021 07:00:46 +0000 (07:00 +0000)
author Gaurav Garg <gaugarg@nvidia.com>
Fri, 27 Aug 2021 07:00:46 +0000 (07:00 +0000)
committer Mark Abraham <mark.j.abraham@gmail.com>
Fri, 27 Aug 2021 07:00:46 +0000 (07:00 +0000)
diff --git a/src/gromacs/domdec/builder.h b/src/gromacs/domdec/builder.h

index aadd54cc5973990bbd5f284d586bdc9793d1d05f..e5b9a8d92b00f370acc43b964b6da5ac6945ae77 100644 (file)
--- a/src/gromacs/domdec/builder.h
+++ b/src/gromacs/domdec/builder.h
@@ -91,7 +91,8 @@ public:
                                 real                              maxUpdateGroupRadius,
                                 ArrayRef<const RVec>              xGlobal,
                                 bool                              useGpuForNonbonded,
-                               bool                              useGpuForPme);
+                               bool                              useGpuForPme,
+                               bool                              directGpuCommUsedWithGpuUpdate);
      //! Destructor
      ~DomainDecompositionBuilder();
      //! Build the resulting DD manager
diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp

index 3627ea02d6dba4cf196572d380554b653764b384..5f26c408481443a0ec67af0691956ecba4843d33 100644 (file)
--- a/src/gromacs/domdec/domdec.cpp
+++ b/src/gromacs/domdec/domdec.cpp
@@ -1803,18 +1803,20 @@ static DlbState forceDlbOffOrBail(DlbState             cmdlineDlbState,
   * state with other run parameters and settings. As a result, the initial state
   * may be altered or an error may be thrown if incompatibility of options is detected.
   *
- * \param [in] mdlog       Logger.
- * \param [in] dlbOption   Enum value for the DLB option.
- * \param [in] bRecordLoad True if the load balancer is recording load information.
- * \param [in] mdrunOptions  Options for mdrun.
- * \param [in] inputrec    Pointer mdrun to input parameters.
- * \returns                DLB initial/startup state.
+ * \param [in] mdlog                Logger.
+ * \param [in] dlbOption            Enum value for the DLB option.
+ * \param [in] bRecordLoad          True if the load balancer is recording load information.
+ * \param [in] mdrunOptions         Options for mdrun.
+ * \param [in] inputrec             Pointer mdrun to input parameters.
+ * \param [in] directGpuCommUsedWithGpuUpdate     Direct GPU halo exchange and GPU update enabled
+ * \returns                         DLB initial/startup state.
   */
  static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
                                           DlbOption                dlbOption,
                                           gmx_bool                 bRecordLoad,
                                           const gmx::MdrunOptions& mdrunOptions,
-                                         const t_inputrec&        inputrec)
+                                         const t_inputrec&        inputrec,
+                                         const bool               directGpuCommUsedWithGpuUpdate)
  {
      DlbState dlbState = DlbState::offCanTurnOn;
  
@@ -1826,6 +1828,15 @@ static DlbState determineInitialDlbState(const gmx::MDLogger&     mdlog,
          default: gmx_incons("Invalid dlbOption enum value");
      }
  
+    // P2P GPU comm + GPU update leads to case in which we enqueue async work for multiple timesteps
+    // DLB needs to be disabled in that case
+    if (directGpuCommUsedWithGpuUpdate)
+    {
+        std::string reasonStr =
+                "it is not supported with GPU direct communication + GPU update enabled.";
+        return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
+    }
+
      /* Reruns don't support DLB: bail or override auto mode */
      if (mdrunOptions.rerun)
      {
@@ -2775,7 +2786,8 @@ static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
  static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
                                  const DomdecOptions&     options,
                                  const gmx::MdrunOptions& mdrunOptions,
-                                const t_inputrec&        ir)
+                                const t_inputrec&        ir,
+                                const bool               directGpuCommUsedWithGpuUpdate)
  {
      DDSettings ddSettings;
  
@@ -2808,8 +2820,8 @@ static DDSettings getDDSettings(const gmx::MDLogger&     mdlog,
          ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
      }
  
-    ddSettings.initialDlbState =
-            determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, ir);
+    ddSettings.initialDlbState = determineInitialDlbState(
+            mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, ir, directGpuCommUsedWithGpuUpdate);
      GMX_LOG(mdlog.info)
              .appendTextFormatted("Dynamic load balancing: %s",
                                   enumValueToString(ddSettings.initialDlbState));
@@ -2844,7 +2856,8 @@ public:
           real                              maxUpdateGroupRadius,
           ArrayRef<const RVec>              xGlobal,
           bool                              useGpuForNonbonded,
-         bool                              useGpuForPme);
+         bool                              useGpuForPme,
+         bool                              directGpuCommUsedWithGpuUpdate);
  
      //! Build the resulting DD manager
      gmx_domdec_t* build(LocalAtomSetManager* atomSets);
@@ -2899,12 +2912,13 @@ DomainDecompositionBuilder::Impl::Impl(const MDLogger&                   mdlog,
                                         const real                        maxUpdateGroupRadius,
                                         ArrayRef<const RVec>              xGlobal,
                                         bool                              useGpuForNonbonded,
-                                       bool                              useGpuForPme) :
+                                       bool                              useGpuForPme,
+                                       bool directGpuCommUsedWithGpuUpdate) :
      mdlog_(mdlog), cr_(cr), options_(options), mtop_(mtop), ir_(ir), notifiers_(notifiers)
  {
      GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->sizeOfDefaultCommunicator);
  
-    ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
+    ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_, directGpuCommUsedWithGpuUpdate);
  
      if (ddSettings_.eFlop > 1)
      {
@@ -3038,7 +3052,8 @@ DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&
                                                         const real           maxUpdateGroupRadius,
                                                         ArrayRef<const RVec> xGlobal,
                                                         const bool           useGpuForNonbonded,
-                                                       const bool           useGpuForPme) :
+                                                       const bool           useGpuForPme,
+                                                       const bool directGpuCommUsedWithGpuUpdate) :
      impl_(new Impl(mdlog,
                     cr,
                     options,
@@ -3052,7 +3067,8 @@ DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger&
                     maxUpdateGroupRadius,
                     xGlobal,
                     useGpuForNonbonded,
-                   useGpuForPme))
+                   useGpuForPme,
+                   directGpuCommUsedWithGpuUpdate))
  {
  }
  
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index 64ada7d8d8a681ca4794da9735a091ce03a2b9c6..98cd178a96ed670ae7163fa4ab18e9639d0aea66 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -1298,6 +1298,45 @@ int Mdrunner::mdrunner()
                                                   systemHasConstraintsOrVsites(mtop),
                                                   cutoffMargin);
  
+    try
+    {
+        const bool haveFrozenAtoms = inputrecFrozenAtoms(inputrec.get());
+
+        useGpuForUpdate = decideWhetherToUseGpuForUpdate(useDomainDecomposition,
+                                                         updateGroups.useUpdateGroups(),
+                                                         pmeRunMode,
+                                                         domdecOptions.numPmeRanks > 0,
+                                                         useGpuForNonbonded,
+                                                         updateTarget,
+                                                         gpusWereDetected,
+                                                         *inputrec,
+                                                         mtop,
+                                                         doEssentialDynamics,
+                                                         gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
+                                                         haveFrozenAtoms,
+                                                         doRerun,
+                                                         devFlags,
+                                                         mdlog);
+    }
+    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
+
+    bool useGpuDirectHalo = false;
+
+    if (useGpuForNonbonded)
+    {
+        // cr->npmenodes is not yet initialized.
+        // domdecOptions.numPmeRanks == -1 results in 0 separate PME ranks when useGpuForNonbonded is true.
+        // Todo: remove this assumption later once auto mode has support for separate PME rank
+        const int numPmeRanks = domdecOptions.numPmeRanks > 0 ? domdecOptions.numPmeRanks : 0;
+        bool      havePPDomainDecomposition = (cr->sizeOfDefaultCommunicator - numPmeRanks) > 1;
+        useGpuDirectHalo                    = decideWhetherToUseGpuForHalo(devFlags,
+                                                        havePPDomainDecomposition,
+                                                        useGpuForNonbonded,
+                                                        useModularSimulator,
+                                                        doRerun,
+                                                        EI_ENERGY_MINIMIZATION(inputrec->eI));
+    }
+
      // This builder is necessary while we have multi-part construction
      // of DD. Before DD is constructed, we use the existence of
      // the builder object to indicate that further construction of DD
@@ -1305,7 +1344,10 @@ int Mdrunner::mdrunner()
      std::unique_ptr<DomainDecompositionBuilder> ddBuilder;
      if (useDomainDecomposition)
      {
-        ddBuilder = std::make_unique<DomainDecompositionBuilder>(
+        // P2P GPU comm + GPU update leads to case in which we enqueue async work for multiple
+        // timesteps. DLB needs to be disabled in that case
+        const bool directGpuCommUsedWithGpuUpdate = GMX_THREAD_MPI && useGpuDirectHalo && useGpuForUpdate;
+        ddBuilder                                 = std::make_unique<DomainDecompositionBuilder>(
                  mdlog,
                  cr,
                  domdecOptions,
@@ -1319,7 +1361,8 @@ int Mdrunner::mdrunner()
                  updateGroups.maxUpdateGroupRadius(),
                  positionsFromStatePointer(globalState.get()),
                  useGpuForNonbonded,
-                useGpuForPme);
+                useGpuForPme,
+                directGpuCommUsedWithGpuUpdate);
      }
      else
      {
@@ -1403,32 +1446,6 @@ int Mdrunner::mdrunner()
          }
      }
  
-    // The GPU update is decided here because we need to know whether the constraints or
-    // SETTLEs can span across the domain borders (i.e. whether or not update groups are
-    // defined). This is only known after DD is initialized, hence decision on using GPU
-    // update is done so late.
-    try
-    {
-        const bool haveFrozenAtoms = inputrecFrozenAtoms(inputrec.get());
-
-        useGpuForUpdate = decideWhetherToUseGpuForUpdate(useDomainDecomposition,
-                                                         updateGroups.useUpdateGroups(),
-                                                         pmeRunMode,
-                                                         domdecOptions.numPmeRanks > 0,
-                                                         useGpuForNonbonded,
-                                                         updateTarget,
-                                                         gpusWereDetected,
-                                                         *inputrec,
-                                                         mtop,
-                                                         doEssentialDynamics,
-                                                         gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
-                                                         haveFrozenAtoms,
-                                                         doRerun,
-                                                         devFlags,
-                                                         mdlog);
-    }
-    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-
      const bool printHostName = (cr->nnodes > 1);
      gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
  
@@ -1445,13 +1462,6 @@ int Mdrunner::mdrunner()
  
      MdrunScheduleWorkload runScheduleWork;
  
-    bool useGpuDirectHalo = decideWhetherToUseGpuForHalo(devFlags,
-                                                         havePPDomainDecomposition(cr),
-                                                         useGpuForNonbonded,
-                                                         useModularSimulator,
-                                                         doRerun,
-                                                         EI_ENERGY_MINIMIZATION(inputrec->eI));
-
      // Also populates the simulation constant workload description.
      // Note: currently the default duty is DUTY_PP | DUTY_PME for all simulations, including those without PME,
      // so this boolean is sufficient on all ranks to determine whether separate PME ranks are used,
author	Gaurav Garg <gaugarg@nvidia.com>
	Fri, 27 Aug 2021 07:00:46 +0000 (07:00 +0000)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Fri, 27 Aug 2021 07:00:46 +0000 (07:00 +0000)
src/gromacs/domdec/builder.h		patch \| blob \| history
src/gromacs/domdec/domdec.cpp		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history