Make PME OpenCL enabled only for AMD devices

author Mark Abraham <mark.j.abraham@gmail.com>

Mon, 5 Nov 2018 17:53:03 +0000 (18:53 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Mon, 5 Nov 2018 23:46:54 +0000 (00:46 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Mon, 5 Nov 2018 17:53:03 +0000 (18:53 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Mon, 5 Nov 2018 23:46:54 +0000 (00:46 +0100)
diff --git a/docs/release-notes/2019/major/highlights.rst b/docs/release-notes/2019/major/highlights.rst

index 38746ec0c970e342657f96aad4e485088b7d1a58..4007329bfae79243bb01ae68f8e6c7f2916f71d5 100644 (file)
--- a/docs/release-notes/2019/major/highlights.rst
+++ b/docs/release-notes/2019/major/highlights.rst
@@ -16,7 +16,7 @@ simulations and hardware. They are:
    include both constraints and virtual sites. This improves performance
    by eliminating overheads during the update, at no cost.
  * Intel integrated GPUs are now supported with OpenCL.
-* PME long-ranged interactions can now also run on a single GPU using
-  OpenCL, which  means many fewer CPU cores are needed for good
+* PME long-ranged interactions can now also run on a single AMD GPU
+  using OpenCL, which means many fewer CPU cores are needed for good
    performance with such hardware.
  * TODO Other stuff
diff --git a/docs/user-guide/mdrun-performance.rst b/docs/user-guide/mdrun-performance.rst

index c995e4ec36d1c494f77f3a30a3cb89c6fa7218be..c15b3e3260f7470cd1b4c06d4dfe2bf1059a2430 100644 (file)
--- a/docs/user-guide/mdrun-performance.rst
+++ b/docs/user-guide/mdrun-performance.rst
@@ -1049,6 +1049,8 @@ Limitations in the current OpenCL support of interest to |Gromacs| users:
  - On NVIDIA GPUs the OpenCL kernels achieve much lower performance
    than the equivalent CUDA kernels due to limitations of the NVIDIA OpenCL
    compiler.
+- PME is currently only supported on AMD devices, because of known
+  issues with devices from other vendors
  
  Limitations of interest to |Gromacs| developers:
  
diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp

index 717eadbfad9453d3e300af6466410df170772e1c..b10c31d3e8e86d8f5b2c3b3743f763790492ce9f 100644 (file)
--- a/src/gromacs/ewald/pme.cpp
+++ b/src/gromacs/ewald/pme.cpp
@@ -88,6 +88,7 @@
  #include "gromacs/fileio/pdbio.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gmxlib/nrnb.h"
+#include "gromacs/hardware/hw_info.h"
  #include "gromacs/math/gmxcomplex.h"
  #include "gromacs/math/invertmatrix.h"
  #include "gromacs/math/units.h"
@@ -103,6 +104,7 @@
  #include "gromacs/timing/walltime_accounting.h"
  #include "gromacs/topology/topology.h"
  #include "gromacs/utility/basedefinitions.h"
+#include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxmpi.h"
@@ -141,7 +143,8 @@ addMessageIfNotSupported(const std::list<std::string> &errorReasons,
      return foundErrorReasons;
  }
  
-bool pme_gpu_supports_build(std::string *error)
+bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo,
+                            std::string         *error)
  {
      std::list<std::string> errorReasons;
      if (GMX_DOUBLE)
@@ -152,6 +155,13 @@ bool pme_gpu_supports_build(std::string *error)
      {
          errorReasons.emplace_back("non-GPU build of GROMACS");
      }
+    if (GMX_GPU == GMX_GPU_OPENCL)
+    {
+        if (!areAllGpuDevicesFromAmd(hwinfo.gpu_info))
+        {
+            errorReasons.emplace_back("only AMD devices are supported");
+        }
+    }
      return addMessageIfNotSupported(errorReasons, error);
  }
  
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h

index ccf5e7227d929cd34b2f63733eeb4dd827abfbc9..1e47dfc5afd677b27d03f11780c84cb8668b9b24 100644 (file)
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -57,6 +57,7 @@
  #include "gromacs/utility/basedefinitions.h"
  #include "gromacs/utility/real.h"
  
+struct gmx_hw_info_t;
  struct interaction_const_t;
  struct t_commrec;
  struct t_inputrec;
@@ -250,11 +251,13 @@ void gmx_pme_reinit_atoms(const gmx_pme_t *pme, int nAtoms, const real *charges)
   * pme_gpu_check_restrictions(), except that works with a
   * formed gmx_pme_t structure. Should that one go away/work with inputrec?
   *
- * \param[out] error  If non-null, the error message when PME is not supported on GPU.
+ * \param[in]  hwinfo  Information about the detected hardware
+ * \param[out] error   If non-null, the error message when PME is not supported on GPU.
   *
   * \returns true if PME can run on GPU on this build, false otherwise.
   */
-bool pme_gpu_supports_build(std::string *error);
+bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo,
+                            std::string         *error);
  
  /*! \brief Checks whether the input system allows to run PME on GPU.
   * TODO: this partly duplicates an internal PME assert function
diff --git a/src/gromacs/ewald/tests/pmegathertest.cpp b/src/gromacs/ewald/tests/pmegathertest.cpp

index 3cd1ede5b90778e68b0e245ac37bd0129443a8f1..41d7fdc0c12f9559023c222ff1c1f0cb7ab15518 100644 (file)
--- a/src/gromacs/ewald/tests/pmegathertest.cpp
+++ b/src/gromacs/ewald/tests/pmegathertest.cpp
@@ -389,7 +389,7 @@ class PmeGatherTest : public ::testing::TestWithParam<GatherInputParameters>
              for (const auto &context : getPmeTestEnv()->getHardwareContexts())
              {
                  CodePath   codePath       = context->getCodePath();
-                const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+                const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
                  if (!supportedInput)
                  {
                      /* Testing the failure for the unsupported input */
diff --git a/src/gromacs/ewald/tests/pmesolvetest.cpp b/src/gromacs/ewald/tests/pmesolvetest.cpp

index b8355ac67d1102954e4e1966d6f0ad9b1ae5b5e4..fed065a220ed9078bc52a4f7cd490cfb19bd26e6 100644 (file)
--- a/src/gromacs/ewald/tests/pmesolvetest.cpp
+++ b/src/gromacs/ewald/tests/pmesolvetest.cpp
@@ -112,7 +112,7 @@ class PmeSolveTest : public ::testing::TestWithParam<SolveInputParameters>
              for (const auto &context : getPmeTestEnv()->getHardwareContexts())
              {
                  CodePath   codePath       = context->getCodePath();
-                const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+                const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
                  if (!supportedInput)
                  {
                      /* Testing the failure for the unsupported input */
diff --git a/src/gromacs/ewald/tests/pmesplinespreadtest.cpp b/src/gromacs/ewald/tests/pmesplinespreadtest.cpp

index 3e593791bf8a16cfdf9f2e100da019d0141bd6e3..d8c5beb645961731541bc6c1c2c79f35f1e19375 100644 (file)
--- a/src/gromacs/ewald/tests/pmesplinespreadtest.cpp
+++ b/src/gromacs/ewald/tests/pmesplinespreadtest.cpp
@@ -123,7 +123,7 @@ class PmeSplineAndSpreadTest : public ::testing::TestWithParam<SplineAndSpreadIn
              for (const auto &context : getPmeTestEnv()->getHardwareContexts())
              {
                  CodePath   codePath       = context->getCodePath();
-                const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+                const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
                  if (!supportedInput)
                  {
                      /* Testing the failure for the unsupported input */
diff --git a/src/gromacs/ewald/tests/pmetestcommon.cpp b/src/gromacs/ewald/tests/pmetestcommon.cpp

index fe0a4fabf18f1726e5d19da9e8ddc82177813610..3f600572fe0b7d5f3f6bb38e51053347b58c4abf 100644 (file)
--- a/src/gromacs/ewald/tests/pmetestcommon.cpp
+++ b/src/gromacs/ewald/tests/pmetestcommon.cpp
@@ -70,7 +70,9 @@ namespace gmx
  namespace test
  {
  
-bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode)
+bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo,
+                             const t_inputrec    *inputRec,
+                             CodePath             mode)
  {
      bool       implemented;
      gmx_mtop_t mtop;
@@ -81,7 +83,7 @@ bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode)
              break;
  
          case CodePath::GPU:
-            implemented = (pme_gpu_supports_build(nullptr) &&
+            implemented = (pme_gpu_supports_build(hwinfo, nullptr) &&
                             pme_gpu_supports_input(*inputRec, mtop, nullptr));
              break;
  
diff --git a/src/gromacs/ewald/tests/pmetestcommon.h b/src/gromacs/ewald/tests/pmetestcommon.h

index d3e9696b2cbc62b99add86eb0a8757bac225d0fe..fc714696dd4b2d11a0ec22872f8e7fab05c79fc9 100644 (file)
--- a/src/gromacs/ewald/tests/pmetestcommon.h
+++ b/src/gromacs/ewald/tests/pmetestcommon.h
@@ -106,7 +106,9 @@ typedef std::tuple<real, Matrix3x3> PmeSolveOutput;
  // Misc.
  
  //! Tells if this generally valid PME input is supported for this mode
-bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode);
+bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo,
+                             const t_inputrec    *inputRec,
+                             CodePath             mode);
  
  //! Spline moduli are computed in double precision, so they're very good in single precision
  constexpr int64_t c_splineModuliSinglePrecisionUlps = 1;
diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.cpp b/src/gromacs/ewald/tests/testhardwarecontexts.cpp

index 3e9b19add6e60a4584c7e378710b915c2a7234ab..b4486fc7caa3d467153ce6f1ea6a170b3d8f7771 100644 (file)
--- a/src/gromacs/ewald/tests/testhardwarecontexts.cpp
+++ b/src/gromacs/ewald/tests/testhardwarecontexts.cpp
@@ -47,6 +47,7 @@
  #include "gromacs/compat/make_unique.h"
  #include "gromacs/ewald/pme.h"
  #include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/detecthardware.h"
  #include "gromacs/hardware/hw_info.h"
  #include "gromacs/utility/basenetwork.h"
  #include "gromacs/utility/exceptions.h"
@@ -111,7 +112,7 @@ void PmeTestEnvironment::SetUp()
      hardwareContexts_.emplace_back(compat::make_unique<TestHardwareContext>(CodePath::CPU, "", nullptr));
  
      hardwareInfo_ = hardwareInit();
-    if (!pme_gpu_supports_build(nullptr))
+    if (!pme_gpu_supports_build(*hardwareInfo_, nullptr))
      {
          // PME can only run on the CPU, so don't make any more test contexts.
          return;
diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.h b/src/gromacs/ewald/tests/testhardwarecontexts.h

index 4d39c755c9e7a3c8ee0322b8b7da96b79a174202..f364c466d4fb3f93e6ad921e6483f984e05b99e5 100644 (file)
--- a/src/gromacs/ewald/tests/testhardwarecontexts.h
+++ b/src/gromacs/ewald/tests/testhardwarecontexts.h
@@ -49,9 +49,10 @@
  #include <gtest/gtest.h>
  
  #include "gromacs/ewald/pme-gpu-program.h"
-#include "gromacs/hardware/detecthardware.h"
  #include "gromacs/hardware/gpu_hw_info.h"
  
+struct gmx_hw_info_t;
+
  namespace gmx
  {
  namespace test
@@ -118,6 +119,8 @@ class PmeTestEnvironment : public ::testing::Environment
          void TearDown() override;
          //! Get available hardware contexts.
          const TestHardwareContexts &getHardwareContexts() const {return hardwareContexts_; }
+        //! Get available hardware information.
+        const gmx_hw_info_t *hwinfo() const { return hardwareInfo_; }
  };
  
  //! Get the test environment
diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h

index ace93a5e3a6f01e4a9d986caf414777e62318a81..69e8ee410cb2ca205c01049f021f68b00410a10f 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils.h
+++ b/src/gromacs/gpu_utils/gpu_utils.h
@@ -203,6 +203,21 @@ void get_gpu_device_info_string(char *GPU_FUNC_ARGUMENT(s),
                                  const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info),
                                  int GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM
  
+/*! \brief Returns whether all compatible OpenCL devices are from AMD.
+ *
+ * This is currently the most useful and best tested platform for
+ * supported OpenCL devices, so some modules may need to check what
+ * degree of support they should offer.
+ *
+ * \todo An enumeration visible in the hardware module would make such
+ * checks more configurable, if we discover other needs in future.
+ *
+ * \returns whether all detected compatible devices have AMD for the vendor.
+ */
+OPENCL_FUNC_QUALIFIER
+bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &OPENCL_FUNC_ARGUMENT(gpuInfo))
+OPENCL_FUNC_TERM_WITH_RETURN(false)
+
  /*! \brief Returns the size of the gpu_dev_info struct.
   *
   * The size of gpu_dev_info can be used for allocation and communication.
diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp

index 6f5f6315233ac3ef1251973622e570f1a258a82f..a9fea0d2840cef5c4b59761d34e7d2007c55e087 100644 (file)
--- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
+++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
@@ -384,6 +384,21 @@ void get_gpu_device_info_string(char *s, const gmx_gpu_info_t &gpu_info, int ind
      }
  }
  
+bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &gpuInfo)
+{
+    bool result = true;
+    for (int i = 0; i < gpuInfo.n_dev; ++i)
+    {
+        if ((gpuInfo.gpu_dev[i].stat == egpuCompatible) &&
+            (gpuInfo.gpu_dev[i].vendor_e != OCL_VENDOR_AMD))
+        {
+            result = false;
+            break;
+        }
+    }
+    return result;
+}
+
  //! This function is documented in the header file
  void init_gpu(const gmx_device_info_t *deviceInfo)
  {
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index 6377d0e792ad2a2fa83bfdc44498bf9f598a670d..6b5398b449f94a008b40739cb9d81012838f6f7a 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -619,7 +619,7 @@ int Mdrunner::mdrunner()
                      inputrec->cutoff_scheme == ecutsVERLET,
                      gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI),
                      hw_opt.nthreads_tmpi);
-            auto canUseGpuForPme   = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
+            auto canUseGpuForPme   = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
              useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
                      (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
                      canUseGpuForPme, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
@@ -687,7 +687,7 @@ int Mdrunner::mdrunner()
                                                                  emulateGpuNonbonded, usingVerletScheme,
                                                                  gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
                                                                  gpusWereDetected);
-        auto canUseGpuForPme   = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
+        auto canUseGpuForPme   = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
          useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
                                                      canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks,
                                                      gpusWereDetected);
diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp

index ae9c22026d5c182f3360e9b1a8e981f2139568ab..5fa35b34eb1ede13742df5a94d322535ed7153b6 100644 (file)
--- a/src/gromacs/taskassignment/resourcedivision.cpp
+++ b/src/gromacs/taskassignment/resourcedivision.cpp
@@ -356,7 +356,7 @@ int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
      if (pmeOnGpu)
      {
          GMX_RELEASE_ASSERT((EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) &&
-                           pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr),
+                           pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr),
                             "PME can't be on GPUs unless we are using PME");
  
          // PME on GPUs supports a single PME rank with PP running on the same or few other ranks.
diff --git a/src/programs/mdrun/tests/pmetest.cpp b/src/programs/mdrun/tests/pmetest.cpp

index 9462633d92ef67f1a204de63dc36ae3c902a4f15..c2c734c53d049c2f4be81f49cb83dca96739eeab 100644 (file)
--- a/src/programs/mdrun/tests/pmetest.cpp
+++ b/src/programs/mdrun/tests/pmetest.cpp
@@ -54,11 +54,15 @@
  
  #include <gtest/gtest-spi.h>
  
+#include "gromacs/ewald/pme.h"
  #include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/detecthardware.h"
  #include "gromacs/hardware/gpu_hw_info.h"
  #include "gromacs/trajectory/energyframe.h"
  #include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/loggerbuilder.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
  #include "gromacs/utility/stringutil.h"
  
  #include "testutils/mpitest.h"
@@ -127,6 +131,11 @@ void PmeTest::runTest(const RunModesList &runModes)
      {
          EXPECT_NONFATAL_FAILURE(rootChecker.checkUnusedEntries(), ""); // skip checks on other ranks
      }
+
+    auto hardwareInfo_ = gmx_detect_hardware(MDLogger {},
+                                             PhysicalNodeCommunicator(MPI_COMM_WORLD,
+                                                                      gmx_physicalnode_id_hash()));
+
      for (const auto &mode : runModes)
      {
          auto modeTargetsGpus = (mode.first.find("Gpu") != std::string::npos);
@@ -137,6 +146,14 @@ void PmeTest::runTest(const RunModesList &runModes)
              // to test here.
              continue;
          }
+        auto modeTargetsPmeOnGpus = (mode.first.find("PmeOnGpu") != std::string::npos);
+        if (modeTargetsPmeOnGpus && !pme_gpu_supports_build(*hardwareInfo_, nullptr))
+        {
+            // This run mode will cause a fatal error from mdrun when
+            // it finds an unsuitable device, which is not something
+            // we're trying to test here.
+            continue;
+        }
  
          runner_.edrFileName_ = fileManager_.getTemporaryFilePath(inputFile + "_" + mode.first + ".edr");
author	Mark Abraham <mark.j.abraham@gmail.com>
	Mon, 5 Nov 2018 17:53:03 +0000 (18:53 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Mon, 5 Nov 2018 23:46:54 +0000 (00:46 +0100)
docs/release-notes/2019/major/highlights.rst		patch \| blob \| history
docs/user-guide/mdrun-performance.rst		patch \| blob \| history
src/gromacs/ewald/pme.cpp		patch \| blob \| history
src/gromacs/ewald/pme.h		patch \| blob \| history
src/gromacs/ewald/tests/pmegathertest.cpp		patch \| blob \| history
src/gromacs/ewald/tests/pmesolvetest.cpp		patch \| blob \| history
src/gromacs/ewald/tests/pmesplinespreadtest.cpp		patch \| blob \| history
src/gromacs/ewald/tests/pmetestcommon.cpp		patch \| blob \| history
src/gromacs/ewald/tests/pmetestcommon.h		patch \| blob \| history
src/gromacs/ewald/tests/testhardwarecontexts.cpp		patch \| blob \| history
src/gromacs/ewald/tests/testhardwarecontexts.h		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils.h		patch \| blob \| history
src/gromacs/gpu_utils/gpu_utils_ocl.cpp		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/taskassignment/resourcedivision.cpp		patch \| blob \| history
src/programs/mdrun/tests/pmetest.cpp		patch \| blob \| history