Enable separate PME GPU rank

author Aleksei Iupinov <a.yupinov@gmail.com>

Thu, 16 Nov 2017 17:07:05 +0000 (18:07 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Fri, 24 Nov 2017 20:15:16 +0000 (21:15 +0100)
author Aleksei Iupinov <a.yupinov@gmail.com>
Thu, 16 Nov 2017 17:07:05 +0000 (18:07 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Fri, 24 Nov 2017 20:15:16 +0000 (21:15 +0100)
diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp

index d20ca674d6de3be6bbd11d42bf1f5079d9fb45fe..6002bb913bb0639fabe885db7fd735b29aec4f11 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.cpp
+++ b/src/gromacs/taskassignment/decidegpuusage.cpp
@@ -152,7 +152,8 @@ decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbo
                                            const std::vector<int> &gpuIdsToUse,
                                            const std::vector<int> &userGpuTaskAssignment,
                                            const bool              canUseGpuForPme,
-                                          const int               numRanksPerSimulation)
+                                          const int               numRanksPerSimulation,
+                                          const int               numPmeRanksPerSimulation)
  {
      // First, exclude all cases where we can't run PME on GPUs.
      if ((pmeTarget == TaskTarget::Cpu) ||
@@ -182,10 +183,11 @@ decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbo
          // PME on GPUs is only supported in a single case
          if (pmeTarget == TaskTarget::Gpu)
          {
-            if (numRanksPerSimulation > 1)
+            if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
+                (numPmeRanksPerSimulation > 1))
              {
                  GMX_THROW(InconsistentInputError
-                              ("When you run mdrun -pme gpu -gputasks, you must supply a PME .tpr file and use a single rank."));
+                              ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
              }
              return true;
          }
@@ -200,12 +202,13 @@ decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbo
  
      if (pmeTarget == TaskTarget::Gpu)
      {
-        if (numRanksPerSimulation > 1)
+        if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
+            (numPmeRanksPerSimulation > 1))
          {
              GMX_THROW(NotImplementedError
                            ("PME tasks were required to run on GPUs, but that is not implemented with "
-                          "more than one rank. Use a single rank, or permit PME tasks to be assigned "
-                          "to the CPU."));
+                          "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
+                          "or permit PME tasks to be assigned to the CPU."));
          }
          return true;
      }
@@ -316,7 +319,8 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
                                    const TaskTarget        pmeTarget,
                                    const std::vector<int> &userGpuTaskAssignment,
                                    const bool              canUseGpuForPme,
-                                  const int               numRanksPerSimulation)
+                                  const int               numRanksPerSimulation,
+                                  const int               numPmeRanksPerSimulation)
  {
      if (pmeTarget == TaskTarget::Cpu)
      {
@@ -374,12 +378,13 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
  
      if (pmeTarget == TaskTarget::Gpu)
      {
-        if (numRanksPerSimulation > 1)
+        if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
+            (numPmeRanksPerSimulation > 1))
          {
              GMX_THROW(NotImplementedError
                            ("PME tasks were required to run on GPUs, but that is not implemented with "
-                          "more than one rank. Use a single rank, or permit PME tasks to be assigned "
-                          "to the CPU."));
+                          "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
+                          "or permit PME tasks to be assigned to the CPU."));
          }
          return true;
      }
diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h

index a1738429b4c8b3119482e3be9c7f7621bd80b6fd..438e00b8a6e48a23e6d95ade1a3ad95ed37db12c 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.h
+++ b/src/gromacs/taskassignment/decidegpuusage.h
@@ -102,6 +102,7 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget          n
   * \param[in]  userGpuTaskAssignment     The user-specified assignment of GPU tasks to device IDs.
   * \param[in]  canUseGpuForPme           Whether the form of PME chosen can run on a GPU
   * \param[in]  numRanksPerSimulation     The number of ranks in each simulation.
+ * \param[in]  numPmeRanksPerSimulation  The number of PME ranks in each simulation.
   *
   * \returns    Whether the simulation will run PME tasks on GPUs.
   *
@@ -112,7 +113,8 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuFor
                                                 const std::vector<int> &gpuIdsToUse,
                                                 const std::vector<int> &userGpuTaskAssignment,
                                                 const bool              canUseGpuForPme,
-                                               const int               numRanksPerSimulation);
+                                               const int               numRanksPerSimulation,
+                                               const int               numPmeRanksPerSimulation);
  
  /*! \brief Decide whether the simulation will try to run nonbonded
   * tasks on GPUs.
@@ -169,6 +171,7 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarg
   * \param[in]  userGpuTaskAssignment     The user-specified assignment of GPU tasks to device IDs.
   * \param[in]  canUseGpuForPme           Whether the form of PME chosen can run on a GPU
   * \param[in]  numRanksPerSimulation     The number of ranks in each simulation.
+ * \param[in]  numPmeRanksPerSimulation  The number of PME ranks in each simulation.
   *
   * \returns    Whether the simulation will run nonbonded and PME tasks, respectively, on GPUs.
   *
@@ -178,7 +181,8 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
                                    const TaskTarget        pmeTarget,
                                    const std::vector<int> &userGpuTaskAssignment,
                                    const bool              canUseGpuForPme,
-                                  const int               numRanksPerSimulation);
+                                  const int               numRanksPerSimulation,
+                                  const int               numPmeRanksPerSimulation);
  
  }
  
diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp

index aae46bd895fc5e92756932002bf89165379372b4..253e2b54bf8ecc9c6feae9c2c292cf202a9cc448 100644 (file)
--- a/src/gromacs/taskassignment/resourcedivision.cpp
+++ b/src/gromacs/taskassignment/resourcedivision.cpp
@@ -354,15 +354,12 @@ int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
                             pme_gpu_supports_input(inputrec, nullptr),
                             "PME can't be on GPUs unless we are using PME");
  
-        // A single rank is all that is supported with PME on GPUs
+        // PME on GPUs supports a single PME rank with PP running on the same or few other ranks.
+        // For now, let's treat separate PME GPU rank as opt-in.
          if (hw_opt->nthreads_tmpi < 1)
          {
              return 1;
          }
-        if (hw_opt->nthreads_tmpi > 1)
-        {
-            gmx_fatal(FARGS, "PME on GPUs is only supported with a single rank");
-        }
      }
  
      {
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp

index 58059f5c4c51a738e550130adaa9626a3edc8bcb..15631660b01e0e2728f370cd3d7c80bb11e9df2e 100644 (file)
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -600,7 +600,7 @@ int Mdrunner::mdrunner()
              auto canUseGpuForPme   = inputSystemHasPme && pme_gpu_supports_input(inputrec, nullptr);
              useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
                      (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
-                    canUseGpuForPme, hw_opt.nthreads_tmpi);
+                    canUseGpuForPme, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
          /* Determine how many thread-MPI ranks to start.
@@ -652,7 +652,7 @@ int Mdrunner::mdrunner()
                                                                  gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, doRerun));
          auto inputSystemHasPme = EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype);
          auto canUseGpuForPme   = inputSystemHasPme && pme_gpu_supports_input(inputrec, nullptr);
-        useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, canUseGpuForPme, cr->nnodes);
+        useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks);
          pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
          if ((pmeRunMode == PmeRunMode::GPU) && (pmeFftTarget == TaskTarget::Cpu))
          {
@@ -734,21 +734,30 @@ int Mdrunner::mdrunner()
  
      if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
      {
-        /* With GPUs we don't automatically use PME-only ranks. PME ranks can
+        /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
           * improve performance with many threads per GPU, since our OpenMP
           * scaling is bad, but it's difficult to automate the setup.
           */
-        domdecOptions.numPmeRanks = 0;
+        if (useGpuForPme && (hw_opt.nthreads_tmpi > 1))
+        {
+            domdecOptions.numPmeRanks = 1;
+            //TODO print appropriate notice on why asking for multiple threads and -pme gpu causes a separate PME rank to start
+        }
+        else
+        {
+            domdecOptions.numPmeRanks = 0;
+        }
      }
      if (useGpuForPme)
      {
          if (domdecOptions.numPmeRanks < 0)
          {
-            domdecOptions.numPmeRanks = 0; // separate GPU ranks not supported
+            domdecOptions.numPmeRanks = 0;
+            // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
          }
          else
          {
-            GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks == 0, "Separate PME GPU ranks are not yet supported");
+            GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported");
          }
      }
  
@@ -1045,6 +1054,7 @@ int Mdrunner::mdrunner()
      if (pmeGpuTaskMapping != gpuTaskAssignment.end())
      {
          pmeDeviceInfo = getDeviceInfo(hwinfo->gpu_info, pmeGpuTaskMapping->deviceId_);
+        init_gpu(mdlog, pmeDeviceInfo);
      }
  
      /* getting number of PP/PME threads
@@ -1212,12 +1222,6 @@ int Mdrunner::mdrunner()
          {
              try
              {
-                if (pmeDeviceInfo != nullptr && pmeDeviceInfo != nonbondedDeviceInfo)
-                {
-                    GMX_THROW(NotImplementedError
-                                  ("PME on a GPU can run only on the same GPU as nonbonded, because "
-                                  "context switching is not yet supported."));
-                }
                  pmedata = gmx_pme_init(cr, npme_major, npme_minor, inputrec,
                                         mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed,
                                         mdrunOptions.reproducible,
diff --git a/src/programs/mdrun/tests/CMakeLists.txt b/src/programs/mdrun/tests/CMakeLists.txt

index 675277e65d4e23580813a4ec7e64330dc6b8ae3e..8bf1a7d781c3533a82563a0bb2cc1f298237d7b9 100644 (file)
--- a/src/programs/mdrun/tests/CMakeLists.txt
+++ b/src/programs/mdrun/tests/CMakeLists.txt
@@ -34,9 +34,12 @@
  
  # make an "object library" for code that we re-use for both kinds of tests
  add_library(mdrun_test_objlib OBJECT
+    energyreader.cpp
      mdruncomparisonfixture.cpp
      moduletest.cpp
      terminationhelper.cpp
+    # PME tests
+    pmetest.cpp
      )
  
  set(testname "MdrunTests")
@@ -46,10 +49,8 @@ gmx_add_gtest_executable(
      ${exename}
      # files with code for tests
      tabulated_bonded_interactions.cpp
-    energyreader.cpp
      grompp.cpp
      initialconstraints.cpp
-    pmetest.cpp
      rerun.cpp
      trajectory_writing.cpp
      trajectoryreader.cpp
diff --git a/src/programs/mdrun/tests/pmetest.cpp b/src/programs/mdrun/tests/pmetest.cpp

index b91d71d543ce95310a1099cd2a6550642adad196..e1acdc8dee1c4940e0e01673959b7f3ebd58b422 100644 (file)
--- a/src/programs/mdrun/tests/pmetest.cpp
+++ b/src/programs/mdrun/tests/pmetest.cpp
@@ -34,10 +34,14 @@
   */
  /*! \internal \file
   * \brief
- * This implements basic PME sanity test (using single-rank mdrun).
+ * This implements basic PME sanity tests.
   * It runs the input system with PME for several steps (on CPU and GPU, if available),
   * and checks the reciprocal and conserved energies.
- * TODO: implement multi-rank tests as well.
+ * As part of mdrun-test, this will always run single rank PME simulation.
+ * As part of mdrun-mpi-test, this will run same as above when a single rank is requested,
+ * or a simulation with a single separate PME rank ("-npme 1") when multiple ranks are requested.
+ * \todo Extend and generalize this for more multi-rank tests (-npme 0, -npme 2, etc).
+ * \todo Implement death tests (e.g. for PME GPU decomposition).
   *
   * \author Aleksei Iupinov <a.yupinov@gmail.com>
   * \ingroup module_mdrun_integration_tests
@@ -50,11 +54,15 @@
  #include <string>
  #include <vector>
  
+#include <gtest/gtest-spi.h>
+
  #include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/hardware/gpu_hw_info.h"
  #include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/gmxmpi.h"
  #include "gromacs/utility/stringutil.h"
  
+#include "testutils/mpitest.h"
  #include "testutils/refdata.h"
  
  #include "energyreader.h"
@@ -111,6 +119,11 @@ TEST_F(PmeTest, ReproducesEnergies)
      const std::string inputFile = "spc-and-methanol";
      runner_.useTopGroAndNdxFromDatabase(inputFile.c_str());
  
+    // With single rank we can and will always test PP+PME as part of mdrun-test.
+    // With multiple ranks we can additionally test a single PME-only rank within mdrun-mpi-test.
+    const bool parallelRun    = (getNumberOfTestMpiRanks() > 1);
+    const bool useSeparatePme = parallelRun;
+
      EXPECT_EQ(0, runner_.callGrompp());
  
      //TODO test all proper/improper combinations in more thorough way?
@@ -122,7 +135,11 @@ TEST_F(PmeTest, ReproducesEnergies)
      runModes["PmeOnGpuFftAuto"]  = {"-pme", "gpu", "-pmefft", "auto"};
      TestReferenceData    refData;
      TestReferenceChecker rootChecker(refData.rootChecker());
-
+    const bool           thisRankChecks = (gmx_node_rank() == 0);
+    if (!thisRankChecks)
+    {
+        EXPECT_NONFATAL_FAILURE(rootChecker.checkUnusedEntries(), ""); // skip checks on other ranks
+    }
      for (const auto &mode : runModes)
      {
          auto modeTargetsGpus = (mode.first.find("Gpu") != std::string::npos);
@@ -138,30 +155,53 @@ TEST_F(PmeTest, ReproducesEnergies)
  
          CommandLine commandLine(mode.second);
          commandLine.append("-notunepme"); // for reciprocal energy reproducibility
+        if (useSeparatePme)
+        {
+            commandLine.addOption("-npme", 1);
+        }
          ASSERT_EQ(0, runner_.callMdrun(commandLine));
  
-        auto energyReader      = openEnergyFileToReadFields(runner_.edrFileName_, {"Coul. recip.", "Total Energy", "Kinetic En."});
-        auto conservedChecker  = rootChecker.checkCompound("Energy", "Conserved");
-        auto reciprocalChecker = rootChecker.checkCompound("Energy", "Reciprocal");
-        for (int i = 0; i <= nsteps; i++)
+        if (thisRankChecks)
          {
-            EnergyFrame frame            = energyReader->frame();
-            std::string stepNum          = gmx::formatString("%d", i);
-            const real  conservedEnergy  = frame.at("Total Energy");
-            const real  reciprocalEnergy = frame.at("Coul. recip.");
-            if (i == 0)
+            auto energyReader      = openEnergyFileToReadFields(runner_.edrFileName_, {"Coul. recip.", "Total Energy", "Kinetic En."});
+            auto conservedChecker  = rootChecker.checkCompound("Energy", "Conserved");
+            auto reciprocalChecker = rootChecker.checkCompound("Energy", "Reciprocal");
+            for (int i = 0; i <= nsteps; i++)
              {
-                // use first step values as references for tolerance
-                const real startingKineticEnergy = frame.at("Kinetic En.");
-                const auto conservedTolerance    = relativeToleranceAsFloatingPoint(startingKineticEnergy, 2e-5);
-                const auto reciprocalTolerance   = relativeToleranceAsFloatingPoint(reciprocalEnergy, 2e-5);
-                reciprocalChecker.setDefaultTolerance(reciprocalTolerance);
-                conservedChecker.setDefaultTolerance(conservedTolerance);
+                EnergyFrame frame            = energyReader->frame();
+                std::string stepNum          = gmx::formatString("%d", i);
+                const real  conservedEnergy  = frame.at("Total Energy");
+                const real  reciprocalEnergy = frame.at("Coul. recip.");
+                if (i == 0)
+                {
+                    // use first step values as references for tolerance
+                    const real startingKineticEnergy = frame.at("Kinetic En.");
+                    const auto conservedTolerance    = relativeToleranceAsFloatingPoint(startingKineticEnergy, 2e-5);
+                    const auto reciprocalTolerance   = relativeToleranceAsFloatingPoint(reciprocalEnergy, 3e-5);
+                    reciprocalChecker.setDefaultTolerance(reciprocalTolerance);
+                    conservedChecker.setDefaultTolerance(conservedTolerance);
+                }
+                conservedChecker.checkReal(conservedEnergy, stepNum.c_str());
+                reciprocalChecker.checkReal(reciprocalEnergy, stepNum.c_str());
              }
-            conservedChecker.checkReal(conservedEnergy, stepNum.c_str());
-            reciprocalChecker.checkReal(reciprocalEnergy, stepNum.c_str());
          }
+        // FIXME: without this barrier, one of the mdruns was somehow having a non-PME inputrec (!)
+#if GMX_LIB_MPI
+        if (parallelRun)
+        {
+            MPI_Barrier(MPI_COMM_WORLD);
+        }
+#endif
+    }
+
+    // This is a workaround for the output files to not be deleted in a parallel run.
+    // TODO: consider moving the barrier into the file manager destructor.
+#if GMX_LIB_MPI
+    if (parallelRun)
+    {
+        MPI_Barrier(MPI_COMM_WORLD);
      }
+#endif
  }
  
  }
author	Aleksei Iupinov <a.yupinov@gmail.com>
	Thu, 16 Nov 2017 17:07:05 +0000 (18:07 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Fri, 24 Nov 2017 20:15:16 +0000 (21:15 +0100)
src/gromacs/taskassignment/decidegpuusage.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.h		patch \| blob \| history
src/gromacs/taskassignment/resourcedivision.cpp		patch \| blob \| history
src/programs/mdrun/runner.cpp		patch \| blob \| history
src/programs/mdrun/tests/CMakeLists.txt		patch \| blob \| history
src/programs/mdrun/tests/pmetest.cpp		patch \| blob \| history