Make PME OpenCL enabled only for AMD devices
authorMark Abraham <mark.j.abraham@gmail.com>
Mon, 5 Nov 2018 17:53:03 +0000 (18:53 +0100)
committerMark Abraham <mark.j.abraham@gmail.com>
Mon, 5 Nov 2018 23:46:54 +0000 (00:46 +0100)
Other vendor devices have known issues, but fixes
are not yet complete.

Refs #2702, #2719

Change-Id: I0d443229ffe4cee3bb4029f57502f9c7fba2574d

16 files changed:
docs/release-notes/2019/major/highlights.rst
docs/user-guide/mdrun-performance.rst
src/gromacs/ewald/pme.cpp
src/gromacs/ewald/pme.h
src/gromacs/ewald/tests/pmegathertest.cpp
src/gromacs/ewald/tests/pmesolvetest.cpp
src/gromacs/ewald/tests/pmesplinespreadtest.cpp
src/gromacs/ewald/tests/pmetestcommon.cpp
src/gromacs/ewald/tests/pmetestcommon.h
src/gromacs/ewald/tests/testhardwarecontexts.cpp
src/gromacs/ewald/tests/testhardwarecontexts.h
src/gromacs/gpu_utils/gpu_utils.h
src/gromacs/gpu_utils/gpu_utils_ocl.cpp
src/gromacs/mdrun/runner.cpp
src/gromacs/taskassignment/resourcedivision.cpp
src/programs/mdrun/tests/pmetest.cpp

index 38746ec0c970e342657f96aad4e485088b7d1a58..4007329bfae79243bb01ae68f8e6c7f2916f71d5 100644 (file)
@@ -16,7 +16,7 @@ simulations and hardware. They are:
   include both constraints and virtual sites. This improves performance
   by eliminating overheads during the update, at no cost.
 * Intel integrated GPUs are now supported with OpenCL.
-* PME long-ranged interactions can now also run on a single GPU using
-  OpenCL, which  means many fewer CPU cores are needed for good
+* PME long-ranged interactions can now also run on a single AMD GPU
+  using OpenCL, which means many fewer CPU cores are needed for good
   performance with such hardware.
 * TODO Other stuff
index c995e4ec36d1c494f77f3a30a3cb89c6fa7218be..c15b3e3260f7470cd1b4c06d4dfe2bf1059a2430 100644 (file)
@@ -1049,6 +1049,8 @@ Limitations in the current OpenCL support of interest to |Gromacs| users:
 - On NVIDIA GPUs the OpenCL kernels achieve much lower performance
   than the equivalent CUDA kernels due to limitations of the NVIDIA OpenCL
   compiler.
+- PME is currently only supported on AMD devices, because of known
+  issues with devices from other vendors
 
 Limitations of interest to |Gromacs| developers:
 
index 717eadbfad9453d3e300af6466410df170772e1c..b10c31d3e8e86d8f5b2c3b3743f763790492ce9f 100644 (file)
@@ -88,6 +88,7 @@
 #include "gromacs/fileio/pdbio.h"
 #include "gromacs/gmxlib/network.h"
 #include "gromacs/gmxlib/nrnb.h"
+#include "gromacs/hardware/hw_info.h"
 #include "gromacs/math/gmxcomplex.h"
 #include "gromacs/math/invertmatrix.h"
 #include "gromacs/math/units.h"
 #include "gromacs/timing/walltime_accounting.h"
 #include "gromacs/topology/topology.h"
 #include "gromacs/utility/basedefinitions.h"
+#include "gromacs/utility/cstringutil.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/gmxmpi.h"
@@ -141,7 +143,8 @@ addMessageIfNotSupported(const std::list<std::string> &errorReasons,
     return foundErrorReasons;
 }
 
-bool pme_gpu_supports_build(std::string *error)
+bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo,
+                            std::string         *error)
 {
     std::list<std::string> errorReasons;
     if (GMX_DOUBLE)
@@ -152,6 +155,13 @@ bool pme_gpu_supports_build(std::string *error)
     {
         errorReasons.emplace_back("non-GPU build of GROMACS");
     }
+    if (GMX_GPU == GMX_GPU_OPENCL)
+    {
+        if (!areAllGpuDevicesFromAmd(hwinfo.gpu_info))
+        {
+            errorReasons.emplace_back("only AMD devices are supported");
+        }
+    }
     return addMessageIfNotSupported(errorReasons, error);
 }
 
index ccf5e7227d929cd34b2f63733eeb4dd827abfbc9..1e47dfc5afd677b27d03f11780c84cb8668b9b24 100644 (file)
@@ -57,6 +57,7 @@
 #include "gromacs/utility/basedefinitions.h"
 #include "gromacs/utility/real.h"
 
+struct gmx_hw_info_t;
 struct interaction_const_t;
 struct t_commrec;
 struct t_inputrec;
@@ -250,11 +251,13 @@ void gmx_pme_reinit_atoms(const gmx_pme_t *pme, int nAtoms, const real *charges)
  * pme_gpu_check_restrictions(), except that works with a
  * formed gmx_pme_t structure. Should that one go away/work with inputrec?
  *
- * \param[out] error  If non-null, the error message when PME is not supported on GPU.
+ * \param[in]  hwinfo  Information about the detected hardware
+ * \param[out] error   If non-null, the error message when PME is not supported on GPU.
  *
  * \returns true if PME can run on GPU on this build, false otherwise.
  */
-bool pme_gpu_supports_build(std::string *error);
+bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo,
+                            std::string         *error);
 
 /*! \brief Checks whether the input system allows to run PME on GPU.
  * TODO: this partly duplicates an internal PME assert function
index 3cd1ede5b90778e68b0e245ac37bd0129443a8f1..41d7fdc0c12f9559023c222ff1c1f0cb7ab15518 100644 (file)
@@ -389,7 +389,7 @@ class PmeGatherTest : public ::testing::TestWithParam<GatherInputParameters>
             for (const auto &context : getPmeTestEnv()->getHardwareContexts())
             {
                 CodePath   codePath       = context->getCodePath();
-                const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+                const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
                 if (!supportedInput)
                 {
                     /* Testing the failure for the unsupported input */
index b8355ac67d1102954e4e1966d6f0ad9b1ae5b5e4..fed065a220ed9078bc52a4f7cd490cfb19bd26e6 100644 (file)
@@ -112,7 +112,7 @@ class PmeSolveTest : public ::testing::TestWithParam<SolveInputParameters>
             for (const auto &context : getPmeTestEnv()->getHardwareContexts())
             {
                 CodePath   codePath       = context->getCodePath();
-                const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+                const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
                 if (!supportedInput)
                 {
                     /* Testing the failure for the unsupported input */
index 3e593791bf8a16cfdf9f2e100da019d0141bd6e3..d8c5beb645961731541bc6c1c2c79f35f1e19375 100644 (file)
@@ -123,7 +123,7 @@ class PmeSplineAndSpreadTest : public ::testing::TestWithParam<SplineAndSpreadIn
             for (const auto &context : getPmeTestEnv()->getHardwareContexts())
             {
                 CodePath   codePath       = context->getCodePath();
-                const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+                const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
                 if (!supportedInput)
                 {
                     /* Testing the failure for the unsupported input */
index fe0a4fabf18f1726e5d19da9e8ddc82177813610..3f600572fe0b7d5f3f6bb38e51053347b58c4abf 100644 (file)
@@ -70,7 +70,9 @@ namespace gmx
 namespace test
 {
 
-bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode)
+bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo,
+                             const t_inputrec    *inputRec,
+                             CodePath             mode)
 {
     bool       implemented;
     gmx_mtop_t mtop;
@@ -81,7 +83,7 @@ bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode)
             break;
 
         case CodePath::GPU:
-            implemented = (pme_gpu_supports_build(nullptr) &&
+            implemented = (pme_gpu_supports_build(hwinfo, nullptr) &&
                            pme_gpu_supports_input(*inputRec, mtop, nullptr));
             break;
 
index d3e9696b2cbc62b99add86eb0a8757bac225d0fe..fc714696dd4b2d11a0ec22872f8e7fab05c79fc9 100644 (file)
@@ -106,7 +106,9 @@ typedef std::tuple<real, Matrix3x3> PmeSolveOutput;
 // Misc.
 
 //! Tells if this generally valid PME input is supported for this mode
-bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode);
+bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo,
+                             const t_inputrec    *inputRec,
+                             CodePath             mode);
 
 //! Spline moduli are computed in double precision, so they're very good in single precision
 constexpr int64_t c_splineModuliSinglePrecisionUlps = 1;
index 3e9b19add6e60a4584c7e378710b915c2a7234ab..b4486fc7caa3d467153ce6f1ea6a170b3d8f7771 100644 (file)
@@ -47,6 +47,7 @@
 #include "gromacs/compat/make_unique.h"
 #include "gromacs/ewald/pme.h"
 #include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/detecthardware.h"
 #include "gromacs/hardware/hw_info.h"
 #include "gromacs/utility/basenetwork.h"
 #include "gromacs/utility/exceptions.h"
@@ -111,7 +112,7 @@ void PmeTestEnvironment::SetUp()
     hardwareContexts_.emplace_back(compat::make_unique<TestHardwareContext>(CodePath::CPU, "", nullptr));
 
     hardwareInfo_ = hardwareInit();
-    if (!pme_gpu_supports_build(nullptr))
+    if (!pme_gpu_supports_build(*hardwareInfo_, nullptr))
     {
         // PME can only run on the CPU, so don't make any more test contexts.
         return;
index 4d39c755c9e7a3c8ee0322b8b7da96b79a174202..f364c466d4fb3f93e6ad921e6483f984e05b99e5 100644 (file)
 #include <gtest/gtest.h>
 
 #include "gromacs/ewald/pme-gpu-program.h"
-#include "gromacs/hardware/detecthardware.h"
 #include "gromacs/hardware/gpu_hw_info.h"
 
+struct gmx_hw_info_t;
+
 namespace gmx
 {
 namespace test
@@ -118,6 +119,8 @@ class PmeTestEnvironment : public ::testing::Environment
         void TearDown() override;
         //! Get available hardware contexts.
         const TestHardwareContexts &getHardwareContexts() const {return hardwareContexts_; }
+        //! Get available hardware information.
+        const gmx_hw_info_t *hwinfo() const { return hardwareInfo_; }
 };
 
 //! Get the test environment
index ace93a5e3a6f01e4a9d986caf414777e62318a81..69e8ee410cb2ca205c01049f021f68b00410a10f 100644 (file)
@@ -203,6 +203,21 @@ void get_gpu_device_info_string(char *GPU_FUNC_ARGUMENT(s),
                                 const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info),
                                 int GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM
 
+/*! \brief Returns whether all compatible OpenCL devices are from AMD.
+ *
+ * This is currently the most useful and best tested platform for
+ * supported OpenCL devices, so some modules may need to check what
+ * degree of support they should offer.
+ *
+ * \todo An enumeration visible in the hardware module would make such
+ * checks more configurable, if we discover other needs in future.
+ *
+ * \returns whether all detected compatible devices have AMD for the vendor.
+ */
+OPENCL_FUNC_QUALIFIER
+bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &OPENCL_FUNC_ARGUMENT(gpuInfo))
+OPENCL_FUNC_TERM_WITH_RETURN(false)
+
 /*! \brief Returns the size of the gpu_dev_info struct.
  *
  * The size of gpu_dev_info can be used for allocation and communication.
index 6f5f6315233ac3ef1251973622e570f1a258a82f..a9fea0d2840cef5c4b59761d34e7d2007c55e087 100644 (file)
@@ -384,6 +384,21 @@ void get_gpu_device_info_string(char *s, const gmx_gpu_info_t &gpu_info, int ind
     }
 }
 
+bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &gpuInfo)
+{
+    bool result = true;
+    for (int i = 0; i < gpuInfo.n_dev; ++i)
+    {
+        if ((gpuInfo.gpu_dev[i].stat == egpuCompatible) &&
+            (gpuInfo.gpu_dev[i].vendor_e != OCL_VENDOR_AMD))
+        {
+            result = false;
+            break;
+        }
+    }
+    return result;
+}
+
 //! This function is documented in the header file
 void init_gpu(const gmx_device_info_t *deviceInfo)
 {
index 6377d0e792ad2a2fa83bfdc44498bf9f598a670d..6b5398b449f94a008b40739cb9d81012838f6f7a 100644 (file)
@@ -619,7 +619,7 @@ int Mdrunner::mdrunner()
                     inputrec->cutoff_scheme == ecutsVERLET,
                     gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI),
                     hw_opt.nthreads_tmpi);
-            auto canUseGpuForPme   = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
+            auto canUseGpuForPme   = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
                     canUseGpuForPme, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
@@ -687,7 +687,7 @@ int Mdrunner::mdrunner()
                                                                 emulateGpuNonbonded, usingVerletScheme,
                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
                                                                 gpusWereDetected);
-        auto canUseGpuForPme   = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
+        auto canUseGpuForPme   = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
                                                     canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks,
                                                     gpusWereDetected);
index ae9c22026d5c182f3360e9b1a8e981f2139568ab..5fa35b34eb1ede13742df5a94d322535ed7153b6 100644 (file)
@@ -356,7 +356,7 @@ int get_nthreads_mpi(const gmx_hw_info_t    *hwinfo,
     if (pmeOnGpu)
     {
         GMX_RELEASE_ASSERT((EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) &&
-                           pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr),
+                           pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr),
                            "PME can't be on GPUs unless we are using PME");
 
         // PME on GPUs supports a single PME rank with PP running on the same or few other ranks.
index 9462633d92ef67f1a204de63dc36ae3c902a4f15..c2c734c53d049c2f4be81f49cb83dca96739eeab 100644 (file)
 
 #include <gtest/gtest-spi.h>
 
+#include "gromacs/ewald/pme.h"
 #include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/detecthardware.h"
 #include "gromacs/hardware/gpu_hw_info.h"
 #include "gromacs/trajectory/energyframe.h"
 #include "gromacs/utility/cstringutil.h"
 #include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/loggerbuilder.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
 #include "gromacs/utility/stringutil.h"
 
 #include "testutils/mpitest.h"
@@ -127,6 +131,11 @@ void PmeTest::runTest(const RunModesList &runModes)
     {
         EXPECT_NONFATAL_FAILURE(rootChecker.checkUnusedEntries(), ""); // skip checks on other ranks
     }
+
+    auto hardwareInfo_ = gmx_detect_hardware(MDLogger {},
+                                             PhysicalNodeCommunicator(MPI_COMM_WORLD,
+                                                                      gmx_physicalnode_id_hash()));
+
     for (const auto &mode : runModes)
     {
         auto modeTargetsGpus = (mode.first.find("Gpu") != std::string::npos);
@@ -137,6 +146,14 @@ void PmeTest::runTest(const RunModesList &runModes)
             // to test here.
             continue;
         }
+        auto modeTargetsPmeOnGpus = (mode.first.find("PmeOnGpu") != std::string::npos);
+        if (modeTargetsPmeOnGpus && !pme_gpu_supports_build(*hardwareInfo_, nullptr))
+        {
+            // This run mode will cause a fatal error from mdrun when
+            // it finds an unsuitable device, which is not something
+            // we're trying to test here.
+            continue;
+        }
 
         runner_.edrFileName_ = fileManager_.getTemporaryFilePath(inputFile + "_" + mode.first + ".edr");