From: Mark Abraham <mark.j.abraham@gmail.com>
Date: Mon, 5 Nov 2018 17:53:03 +0000 (+0100)
Subject: Make PME OpenCL enabled only for AMD devices
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=577b4d2369c8dd8098f96adcc98caae2d3d8ed6a;p=alexxy%2Fgromacs.git

Make PME OpenCL enabled only for AMD devices

Other vendor devices have known issues, but fixes
are not yet complete.

Refs #2702, #2719

Change-Id: I0d443229ffe4cee3bb4029f57502f9c7fba2574d
---

diff --git a/docs/release-notes/2019/major/highlights.rst b/docs/release-notes/2019/major/highlights.rst
index 38746ec0c9..4007329bfa 100644
--- a/docs/release-notes/2019/major/highlights.rst
+++ b/docs/release-notes/2019/major/highlights.rst
@@ -16,7 +16,7 @@ simulations and hardware. They are:
   include both constraints and virtual sites. This improves performance
   by eliminating overheads during the update, at no cost.
 * Intel integrated GPUs are now supported with OpenCL.
-* PME long-ranged interactions can now also run on a single GPU using
-  OpenCL, which  means many fewer CPU cores are needed for good
+* PME long-ranged interactions can now also run on a single AMD GPU
+  using OpenCL, which means many fewer CPU cores are needed for good
   performance with such hardware.
 * TODO Other stuff
diff --git a/docs/user-guide/mdrun-performance.rst b/docs/user-guide/mdrun-performance.rst
index c995e4ec36..c15b3e3260 100644
--- a/docs/user-guide/mdrun-performance.rst
+++ b/docs/user-guide/mdrun-performance.rst
@@ -1049,6 +1049,8 @@ Limitations in the current OpenCL support of interest to |Gromacs| users:
 - On NVIDIA GPUs the OpenCL kernels achieve much lower performance
   than the equivalent CUDA kernels due to limitations of the NVIDIA OpenCL
   compiler.
+- PME is currently only supported on AMD devices, because of known
+  issues with devices from other vendors
 
 Limitations of interest to |Gromacs| developers:
 
diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp
index 717eadbfad..b10c31d3e8 100644
--- a/src/gromacs/ewald/pme.cpp
+++ b/src/gromacs/ewald/pme.cpp
@@ -88,6 +88,7 @@
 #include "gromacs/fileio/pdbio.h"
 #include "gromacs/gmxlib/network.h"
 #include "gromacs/gmxlib/nrnb.h"
+#include "gromacs/hardware/hw_info.h"
 #include "gromacs/math/gmxcomplex.h"
 #include "gromacs/math/invertmatrix.h"
 #include "gromacs/math/units.h"
@@ -103,6 +104,7 @@
 #include "gromacs/timing/walltime_accounting.h"
 #include "gromacs/topology/topology.h"
 #include "gromacs/utility/basedefinitions.h"
+#include "gromacs/utility/cstringutil.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/gmxmpi.h"
@@ -141,7 +143,8 @@ addMessageIfNotSupported(const std::list<std::string> &errorReasons,
     return foundErrorReasons;
 }
 
-bool pme_gpu_supports_build(std::string *error)
+bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo,
+                            std::string         *error)
 {
     std::list<std::string> errorReasons;
     if (GMX_DOUBLE)
@@ -152,6 +155,13 @@ bool pme_gpu_supports_build(std::string *error)
     {
         errorReasons.emplace_back("non-GPU build of GROMACS");
     }
+    if (GMX_GPU == GMX_GPU_OPENCL)
+    {
+        if (!areAllGpuDevicesFromAmd(hwinfo.gpu_info))
+        {
+            errorReasons.emplace_back("only AMD devices are supported");
+        }
+    }
     return addMessageIfNotSupported(errorReasons, error);
 }
 
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h
index ccf5e7227d..1e47dfc5af 100644
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -57,6 +57,7 @@
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/real.h"
 
+struct gmx_hw_info_t;
 struct interaction_const_t;
 struct t_commrec;
 struct t_inputrec;
@@ -250,11 +251,13 @@ void gmx_pme_reinit_atoms(const gmx_pme_t *pme, int nAtoms, const real *charges)
  * pme_gpu_check_restrictions(), except that works with a
  * formed gmx_pme_t structure. Should that one go away/work with inputrec?
  *
- * \param[out] error  If non-null, the error message when PME is not supported on GPU.
+ * \param[in]  hwinfo  Information about the detected hardware
+ * \param[out] error   If non-null, the error message when PME is not supported on GPU.
  *
  * \returns true if PME can run on GPU on this build, false otherwise.
  */
-bool pme_gpu_supports_build(std::string *error);
+bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo,
+                            std::string         *error);
 
 /*! \brief Checks whether the input system allows to run PME on GPU.
  * TODO: this partly duplicates an internal PME assert function
diff --git a/src/gromacs/ewald/tests/pmegathertest.cpp b/src/gromacs/ewald/tests/pmegathertest.cpp
index 3cd1ede5b9..41d7fdc0c1 100644
--- a/src/gromacs/ewald/tests/pmegathertest.cpp
+++ b/src/gromacs/ewald/tests/pmegathertest.cpp
@@ -389,7 +389,7 @@ class PmeGatherTest : public ::testing::TestWithParam<GatherInputParameters>
             for (const auto &context : getPmeTestEnv()->getHardwareContexts())
             {
                 CodePath   codePath       = context->getCodePath();
-                const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+                const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
                 if (!supportedInput)
                 {
                     /* Testing the failure for the unsupported input */
diff --git a/src/gromacs/ewald/tests/pmesolvetest.cpp b/src/gromacs/ewald/tests/pmesolvetest.cpp
index b8355ac67d..fed065a220 100644
--- a/src/gromacs/ewald/tests/pmesolvetest.cpp
+++ b/src/gromacs/ewald/tests/pmesolvetest.cpp
@@ -112,7 +112,7 @@ class PmeSolveTest : public ::testing::TestWithParam<SolveInputParameters>
             for (const auto &context : getPmeTestEnv()->getHardwareContexts())
             {
                 CodePath   codePath       = context->getCodePath();
-                const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+                const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
                 if (!supportedInput)
                 {
                     /* Testing the failure for the unsupported input */
diff --git a/src/gromacs/ewald/tests/pmesplinespreadtest.cpp b/src/gromacs/ewald/tests/pmesplinespreadtest.cpp
index 3e593791bf..d8c5beb645 100644
--- a/src/gromacs/ewald/tests/pmesplinespreadtest.cpp
+++ b/src/gromacs/ewald/tests/pmesplinespreadtest.cpp
@@ -123,7 +123,7 @@ class PmeSplineAndSpreadTest : public ::testing::TestWithParam<SplineAndSpreadIn
             for (const auto &context : getPmeTestEnv()->getHardwareContexts())
             {
                 CodePath   codePath       = context->getCodePath();
-                const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+                const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
                 if (!supportedInput)
                 {
                     /* Testing the failure for the unsupported input */
diff --git a/src/gromacs/ewald/tests/pmetestcommon.cpp b/src/gromacs/ewald/tests/pmetestcommon.cpp
index fe0a4fabf1..3f600572fe 100644
--- a/src/gromacs/ewald/tests/pmetestcommon.cpp
+++ b/src/gromacs/ewald/tests/pmetestcommon.cpp
@@ -70,7 +70,9 @@ namespace gmx
 namespace test
 {
 
-bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode)
+bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo,
+                             const t_inputrec    *inputRec,
+                             CodePath             mode)
 {
     bool       implemented;
     gmx_mtop_t mtop;
@@ -81,7 +83,7 @@ bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode)
             break;
 
         case CodePath::GPU:
-            implemented = (pme_gpu_supports_build(nullptr) &&
+            implemented = (pme_gpu_supports_build(hwinfo, nullptr) &&
                            pme_gpu_supports_input(*inputRec, mtop, nullptr));
             break;
 
diff --git a/src/gromacs/ewald/tests/pmetestcommon.h b/src/gromacs/ewald/tests/pmetestcommon.h
index d3e9696b2c..fc714696dd 100644
--- a/src/gromacs/ewald/tests/pmetestcommon.h
+++ b/src/gromacs/ewald/tests/pmetestcommon.h
@@ -106,7 +106,9 @@ typedef std::tuple<real, Matrix3x3> PmeSolveOutput;
 // Misc.
 
 //! Tells if this generally valid PME input is supported for this mode
-bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode);
+bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo,
+                             const t_inputrec    *inputRec,
+                             CodePath             mode);
 
 //! Spline moduli are computed in double precision, so they're very good in single precision
 constexpr int64_t c_splineModuliSinglePrecisionUlps = 1;
diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.cpp b/src/gromacs/ewald/tests/testhardwarecontexts.cpp
index 3e9b19add6..b4486fc7ca 100644
--- a/src/gromacs/ewald/tests/testhardwarecontexts.cpp
+++ b/src/gromacs/ewald/tests/testhardwarecontexts.cpp
@@ -47,6 +47,7 @@
 #include "gromacs/compat/make_unique.h"
 #include "gromacs/ewald/pme.h"
 #include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/detecthardware.h"
 #include "gromacs/hardware/hw_info.h"
 #include "gromacs/utility/basenetwork.h"
 #include "gromacs/utility/exceptions.h"
@@ -111,7 +112,7 @@ void PmeTestEnvironment::SetUp()
     hardwareContexts_.emplace_back(compat::make_unique<TestHardwareContext>(CodePath::CPU, "", nullptr));
 
     hardwareInfo_ = hardwareInit();
-    if (!pme_gpu_supports_build(nullptr))
+    if (!pme_gpu_supports_build(*hardwareInfo_, nullptr))
     {
         // PME can only run on the CPU, so don't make any more test contexts.
         return;
diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.h b/src/gromacs/ewald/tests/testhardwarecontexts.h
index 4d39c755c9..f364c466d4 100644
--- a/src/gromacs/ewald/tests/testhardwarecontexts.h
+++ b/src/gromacs/ewald/tests/testhardwarecontexts.h
@@ -49,9 +49,10 @@
 #include <gtest/gtest.h>
 
 #include "gromacs/ewald/pme-gpu-program.h"
-#include "gromacs/hardware/detecthardware.h"
 #include "gromacs/hardware/gpu_hw_info.h"
 
+struct gmx_hw_info_t;
+
 namespace gmx
 {
 namespace test
@@ -118,6 +119,8 @@ class PmeTestEnvironment : public ::testing::Environment
         void TearDown() override;
         //! Get available hardware contexts.
         const TestHardwareContexts &getHardwareContexts() const {return hardwareContexts_; }
+        //! Get available hardware information.
+        const gmx_hw_info_t *hwinfo() const { return hardwareInfo_; }
 };
 
 //! Get the test environment
diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h
index ace93a5e3a..69e8ee410c 100644
--- a/src/gromacs/gpu_utils/gpu_utils.h
+++ b/src/gromacs/gpu_utils/gpu_utils.h
@@ -203,6 +203,21 @@ void get_gpu_device_info_string(char *GPU_FUNC_ARGUMENT(s),
                                 const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info),
                                 int GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM
 
+/*! \brief Returns whether all compatible OpenCL devices are from AMD.
+ *
+ * This is currently the most useful and best tested platform for
+ * supported OpenCL devices, so some modules may need to check what
+ * degree of support they should offer.
+ *
+ * \todo An enumeration visible in the hardware module would make such
+ * checks more configurable, if we discover other needs in future.
+ *
+ * \returns whether all detected compatible devices have AMD for the vendor.
+ */
+OPENCL_FUNC_QUALIFIER
+bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &OPENCL_FUNC_ARGUMENT(gpuInfo))
+OPENCL_FUNC_TERM_WITH_RETURN(false)
+
 /*! \brief Returns the size of the gpu_dev_info struct.
  *
  * The size of gpu_dev_info can be used for allocation and communication.
diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
index 6f5f631523..a9fea0d284 100644
--- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
+++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp
@@ -384,6 +384,21 @@ void get_gpu_device_info_string(char *s, const gmx_gpu_info_t &gpu_info, int ind
     }
 }
 
+bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &gpuInfo)
+{
+    bool result = true;
+    for (int i = 0; i < gpuInfo.n_dev; ++i)
+    {
+        if ((gpuInfo.gpu_dev[i].stat == egpuCompatible) &&
+            (gpuInfo.gpu_dev[i].vendor_e != OCL_VENDOR_AMD))
+        {
+            result = false;
+            break;
+        }
+    }
+    return result;
+}
+
 //! This function is documented in the header file
 void init_gpu(const gmx_device_info_t *deviceInfo)
 {
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp
index 6377d0e792..6b5398b449 100644
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -619,7 +619,7 @@ int Mdrunner::mdrunner()
                     inputrec->cutoff_scheme == ecutsVERLET,
                     gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI),
                     hw_opt.nthreads_tmpi);
-            auto canUseGpuForPme   = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
+            auto canUseGpuForPme   = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
                     canUseGpuForPme, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
@@ -687,7 +687,7 @@ int Mdrunner::mdrunner()
                                                                 emulateGpuNonbonded, usingVerletScheme,
                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
                                                                 gpusWereDetected);
-        auto canUseGpuForPme   = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
+        auto canUseGpuForPme   = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
                                                     canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks,
                                                     gpusWereDetected);
diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp
index ae9c22026d..5fa35b34eb 100644
--- a/src/gromacs/taskassignment/resourcedivision.cpp
+++ b/src/gromacs/taskassignment/resourcedivision.cpp
@@ -356,7 +356,7 @@ int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
     if (pmeOnGpu)
     {
         GMX_RELEASE_ASSERT((EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) &&
-                           pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr),
+                           pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr),
                            "PME can't be on GPUs unless we are using PME");
 
         // PME on GPUs supports a single PME rank with PP running on the same or few other ranks.
diff --git a/src/programs/mdrun/tests/pmetest.cpp b/src/programs/mdrun/tests/pmetest.cpp
index 9462633d92..c2c734c53d 100644
--- a/src/programs/mdrun/tests/pmetest.cpp
+++ b/src/programs/mdrun/tests/pmetest.cpp
@@ -54,11 +54,15 @@
 
 #include <gtest/gtest-spi.h>
 
+#include "gromacs/ewald/pme.h"
 #include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/detecthardware.h"
 #include "gromacs/hardware/gpu_hw_info.h"
 #include "gromacs/trajectory/energyframe.h"
 #include "gromacs/utility/cstringutil.h"
 #include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/loggerbuilder.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
 #include "gromacs/utility/stringutil.h"
 
 #include "testutils/mpitest.h"
@@ -127,6 +131,11 @@ void PmeTest::runTest(const RunModesList &runModes)
     {
         EXPECT_NONFATAL_FAILURE(rootChecker.checkUnusedEntries(), ""); // skip checks on other ranks
     }
+
+    auto hardwareInfo_ = gmx_detect_hardware(MDLogger {},
+                                             PhysicalNodeCommunicator(MPI_COMM_WORLD,
+                                                                      gmx_physicalnode_id_hash()));
+
     for (const auto &mode : runModes)
     {
         auto modeTargetsGpus = (mode.first.find("Gpu") != std::string::npos);
@@ -137,6 +146,14 @@ void PmeTest::runTest(const RunModesList &runModes)
             // to test here.
             continue;
         }
+        auto modeTargetsPmeOnGpus = (mode.first.find("PmeOnGpu") != std::string::npos);
+        if (modeTargetsPmeOnGpus && !pme_gpu_supports_build(*hardwareInfo_, nullptr))
+        {
+            // This run mode will cause a fatal error from mdrun when
+            // it finds an unsuitable device, which is not something
+            // we're trying to test here.
+            continue;
+        }
 
         runner_.edrFileName_ = fileManager_.getTemporaryFilePath(inputFile + "_" + mode.first + ".edr");