From: Mark Abraham Date: Mon, 5 Nov 2018 17:53:03 +0000 (+0100) Subject: Make PME OpenCL enabled only for AMD devices X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=577b4d2369c8dd8098f96adcc98caae2d3d8ed6a;p=alexxy%2Fgromacs.git Make PME OpenCL enabled only for AMD devices Other vendor devices have known issues, but fixes are not yet complete. Refs #2702, #2719 Change-Id: I0d443229ffe4cee3bb4029f57502f9c7fba2574d --- diff --git a/docs/release-notes/2019/major/highlights.rst b/docs/release-notes/2019/major/highlights.rst index 38746ec0c9..4007329bfa 100644 --- a/docs/release-notes/2019/major/highlights.rst +++ b/docs/release-notes/2019/major/highlights.rst @@ -16,7 +16,7 @@ simulations and hardware. They are: include both constraints and virtual sites. This improves performance by eliminating overheads during the update, at no cost. * Intel integrated GPUs are now supported with OpenCL. -* PME long-ranged interactions can now also run on a single GPU using - OpenCL, which means many fewer CPU cores are needed for good +* PME long-ranged interactions can now also run on a single AMD GPU + using OpenCL, which means many fewer CPU cores are needed for good performance with such hardware. * TODO Other stuff diff --git a/docs/user-guide/mdrun-performance.rst b/docs/user-guide/mdrun-performance.rst index c995e4ec36..c15b3e3260 100644 --- a/docs/user-guide/mdrun-performance.rst +++ b/docs/user-guide/mdrun-performance.rst @@ -1049,6 +1049,8 @@ Limitations in the current OpenCL support of interest to |Gromacs| users: - On NVIDIA GPUs the OpenCL kernels achieve much lower performance than the equivalent CUDA kernels due to limitations of the NVIDIA OpenCL compiler. +- PME is currently only supported on AMD devices, because of known + issues with devices from other vendors Limitations of interest to |Gromacs| developers: diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp index 717eadbfad..b10c31d3e8 100644 --- a/src/gromacs/ewald/pme.cpp +++ b/src/gromacs/ewald/pme.cpp @@ -88,6 +88,7 @@ #include "gromacs/fileio/pdbio.h" #include "gromacs/gmxlib/network.h" #include "gromacs/gmxlib/nrnb.h" +#include "gromacs/hardware/hw_info.h" #include "gromacs/math/gmxcomplex.h" #include "gromacs/math/invertmatrix.h" #include "gromacs/math/units.h" @@ -103,6 +104,7 @@ #include "gromacs/timing/walltime_accounting.h" #include "gromacs/topology/topology.h" #include "gromacs/utility/basedefinitions.h" +#include "gromacs/utility/cstringutil.h" #include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxmpi.h" @@ -141,7 +143,8 @@ addMessageIfNotSupported(const std::list &errorReasons, return foundErrorReasons; } -bool pme_gpu_supports_build(std::string *error) +bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo, + std::string *error) { std::list errorReasons; if (GMX_DOUBLE) @@ -152,6 +155,13 @@ bool pme_gpu_supports_build(std::string *error) { errorReasons.emplace_back("non-GPU build of GROMACS"); } + if (GMX_GPU == GMX_GPU_OPENCL) + { + if (!areAllGpuDevicesFromAmd(hwinfo.gpu_info)) + { + errorReasons.emplace_back("only AMD devices are supported"); + } + } return addMessageIfNotSupported(errorReasons, error); } diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h index ccf5e7227d..1e47dfc5af 100644 --- a/src/gromacs/ewald/pme.h +++ b/src/gromacs/ewald/pme.h @@ -57,6 +57,7 @@ #include "gromacs/utility/basedefinitions.h" #include "gromacs/utility/real.h" +struct gmx_hw_info_t; struct interaction_const_t; struct t_commrec; struct t_inputrec; @@ -250,11 +251,13 @@ void gmx_pme_reinit_atoms(const gmx_pme_t *pme, int nAtoms, const real *charges) * pme_gpu_check_restrictions(), except that works with a * formed gmx_pme_t structure. Should that one go away/work with inputrec? * - * \param[out] error If non-null, the error message when PME is not supported on GPU. + * \param[in] hwinfo Information about the detected hardware + * \param[out] error If non-null, the error message when PME is not supported on GPU. * * \returns true if PME can run on GPU on this build, false otherwise. */ -bool pme_gpu_supports_build(std::string *error); +bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo, + std::string *error); /*! \brief Checks whether the input system allows to run PME on GPU. * TODO: this partly duplicates an internal PME assert function diff --git a/src/gromacs/ewald/tests/pmegathertest.cpp b/src/gromacs/ewald/tests/pmegathertest.cpp index 3cd1ede5b9..41d7fdc0c1 100644 --- a/src/gromacs/ewald/tests/pmegathertest.cpp +++ b/src/gromacs/ewald/tests/pmegathertest.cpp @@ -389,7 +389,7 @@ class PmeGatherTest : public ::testing::TestWithParam for (const auto &context : getPmeTestEnv()->getHardwareContexts()) { CodePath codePath = context->getCodePath(); - const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath); + const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath); if (!supportedInput) { /* Testing the failure for the unsupported input */ diff --git a/src/gromacs/ewald/tests/pmesolvetest.cpp b/src/gromacs/ewald/tests/pmesolvetest.cpp index b8355ac67d..fed065a220 100644 --- a/src/gromacs/ewald/tests/pmesolvetest.cpp +++ b/src/gromacs/ewald/tests/pmesolvetest.cpp @@ -112,7 +112,7 @@ class PmeSolveTest : public ::testing::TestWithParam for (const auto &context : getPmeTestEnv()->getHardwareContexts()) { CodePath codePath = context->getCodePath(); - const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath); + const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath); if (!supportedInput) { /* Testing the failure for the unsupported input */ diff --git a/src/gromacs/ewald/tests/pmesplinespreadtest.cpp b/src/gromacs/ewald/tests/pmesplinespreadtest.cpp index 3e593791bf..d8c5beb645 100644 --- a/src/gromacs/ewald/tests/pmesplinespreadtest.cpp +++ b/src/gromacs/ewald/tests/pmesplinespreadtest.cpp @@ -123,7 +123,7 @@ class PmeSplineAndSpreadTest : public ::testing::TestWithParamgetHardwareContexts()) { CodePath codePath = context->getCodePath(); - const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath); + const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath); if (!supportedInput) { /* Testing the failure for the unsupported input */ diff --git a/src/gromacs/ewald/tests/pmetestcommon.cpp b/src/gromacs/ewald/tests/pmetestcommon.cpp index fe0a4fabf1..3f600572fe 100644 --- a/src/gromacs/ewald/tests/pmetestcommon.cpp +++ b/src/gromacs/ewald/tests/pmetestcommon.cpp @@ -70,7 +70,9 @@ namespace gmx namespace test { -bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode) +bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo, + const t_inputrec *inputRec, + CodePath mode) { bool implemented; gmx_mtop_t mtop; @@ -81,7 +83,7 @@ bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode) break; case CodePath::GPU: - implemented = (pme_gpu_supports_build(nullptr) && + implemented = (pme_gpu_supports_build(hwinfo, nullptr) && pme_gpu_supports_input(*inputRec, mtop, nullptr)); break; diff --git a/src/gromacs/ewald/tests/pmetestcommon.h b/src/gromacs/ewald/tests/pmetestcommon.h index d3e9696b2c..fc714696dd 100644 --- a/src/gromacs/ewald/tests/pmetestcommon.h +++ b/src/gromacs/ewald/tests/pmetestcommon.h @@ -106,7 +106,9 @@ typedef std::tuple PmeSolveOutput; // Misc. //! Tells if this generally valid PME input is supported for this mode -bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode); +bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo, + const t_inputrec *inputRec, + CodePath mode); //! Spline moduli are computed in double precision, so they're very good in single precision constexpr int64_t c_splineModuliSinglePrecisionUlps = 1; diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.cpp b/src/gromacs/ewald/tests/testhardwarecontexts.cpp index 3e9b19add6..b4486fc7ca 100644 --- a/src/gromacs/ewald/tests/testhardwarecontexts.cpp +++ b/src/gromacs/ewald/tests/testhardwarecontexts.cpp @@ -47,6 +47,7 @@ #include "gromacs/compat/make_unique.h" #include "gromacs/ewald/pme.h" #include "gromacs/gpu_utils/gpu_utils.h" +#include "gromacs/hardware/detecthardware.h" #include "gromacs/hardware/hw_info.h" #include "gromacs/utility/basenetwork.h" #include "gromacs/utility/exceptions.h" @@ -111,7 +112,7 @@ void PmeTestEnvironment::SetUp() hardwareContexts_.emplace_back(compat::make_unique(CodePath::CPU, "", nullptr)); hardwareInfo_ = hardwareInit(); - if (!pme_gpu_supports_build(nullptr)) + if (!pme_gpu_supports_build(*hardwareInfo_, nullptr)) { // PME can only run on the CPU, so don't make any more test contexts. return; diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.h b/src/gromacs/ewald/tests/testhardwarecontexts.h index 4d39c755c9..f364c466d4 100644 --- a/src/gromacs/ewald/tests/testhardwarecontexts.h +++ b/src/gromacs/ewald/tests/testhardwarecontexts.h @@ -49,9 +49,10 @@ #include #include "gromacs/ewald/pme-gpu-program.h" -#include "gromacs/hardware/detecthardware.h" #include "gromacs/hardware/gpu_hw_info.h" +struct gmx_hw_info_t; + namespace gmx { namespace test @@ -118,6 +119,8 @@ class PmeTestEnvironment : public ::testing::Environment void TearDown() override; //! Get available hardware contexts. const TestHardwareContexts &getHardwareContexts() const {return hardwareContexts_; } + //! Get available hardware information. + const gmx_hw_info_t *hwinfo() const { return hardwareInfo_; } }; //! Get the test environment diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h index ace93a5e3a..69e8ee410c 100644 --- a/src/gromacs/gpu_utils/gpu_utils.h +++ b/src/gromacs/gpu_utils/gpu_utils.h @@ -203,6 +203,21 @@ void get_gpu_device_info_string(char *GPU_FUNC_ARGUMENT(s), const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info), int GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM +/*! \brief Returns whether all compatible OpenCL devices are from AMD. + * + * This is currently the most useful and best tested platform for + * supported OpenCL devices, so some modules may need to check what + * degree of support they should offer. + * + * \todo An enumeration visible in the hardware module would make such + * checks more configurable, if we discover other needs in future. + * + * \returns whether all detected compatible devices have AMD for the vendor. + */ +OPENCL_FUNC_QUALIFIER +bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &OPENCL_FUNC_ARGUMENT(gpuInfo)) +OPENCL_FUNC_TERM_WITH_RETURN(false) + /*! \brief Returns the size of the gpu_dev_info struct. * * The size of gpu_dev_info can be used for allocation and communication. diff --git a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp index 6f5f631523..a9fea0d284 100644 --- a/src/gromacs/gpu_utils/gpu_utils_ocl.cpp +++ b/src/gromacs/gpu_utils/gpu_utils_ocl.cpp @@ -384,6 +384,21 @@ void get_gpu_device_info_string(char *s, const gmx_gpu_info_t &gpu_info, int ind } } +bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &gpuInfo) +{ + bool result = true; + for (int i = 0; i < gpuInfo.n_dev; ++i) + { + if ((gpuInfo.gpu_dev[i].stat == egpuCompatible) && + (gpuInfo.gpu_dev[i].vendor_e != OCL_VENDOR_AMD)) + { + result = false; + break; + } + } + return result; +} + //! This function is documented in the header file void init_gpu(const gmx_device_info_t *deviceInfo) { diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 6377d0e792..6b5398b449 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -619,7 +619,7 @@ int Mdrunner::mdrunner() inputrec->cutoff_scheme == ecutsVERLET, gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI), hw_opt.nthreads_tmpi); - auto canUseGpuForPme = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr); + auto canUseGpuForPme = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr); useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment, canUseGpuForPme, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks); @@ -687,7 +687,7 @@ int Mdrunner::mdrunner() emulateGpuNonbonded, usingVerletScheme, gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI), gpusWereDetected); - auto canUseGpuForPme = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr); + auto canUseGpuForPme = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr); useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks, gpusWereDetected); diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp index ae9c22026d..5fa35b34eb 100644 --- a/src/gromacs/taskassignment/resourcedivision.cpp +++ b/src/gromacs/taskassignment/resourcedivision.cpp @@ -356,7 +356,7 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo, if (pmeOnGpu) { GMX_RELEASE_ASSERT((EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) && - pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr), + pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr), "PME can't be on GPUs unless we are using PME"); // PME on GPUs supports a single PME rank with PP running on the same or few other ranks. diff --git a/src/programs/mdrun/tests/pmetest.cpp b/src/programs/mdrun/tests/pmetest.cpp index 9462633d92..c2c734c53d 100644 --- a/src/programs/mdrun/tests/pmetest.cpp +++ b/src/programs/mdrun/tests/pmetest.cpp @@ -54,11 +54,15 @@ #include +#include "gromacs/ewald/pme.h" #include "gromacs/gpu_utils/gpu_utils.h" +#include "gromacs/hardware/detecthardware.h" #include "gromacs/hardware/gpu_hw_info.h" #include "gromacs/trajectory/energyframe.h" #include "gromacs/utility/cstringutil.h" #include "gromacs/utility/gmxmpi.h" +#include "gromacs/utility/loggerbuilder.h" +#include "gromacs/utility/physicalnodecommunicator.h" #include "gromacs/utility/stringutil.h" #include "testutils/mpitest.h" @@ -127,6 +131,11 @@ void PmeTest::runTest(const RunModesList &runModes) { EXPECT_NONFATAL_FAILURE(rootChecker.checkUnusedEntries(), ""); // skip checks on other ranks } + + auto hardwareInfo_ = gmx_detect_hardware(MDLogger {}, + PhysicalNodeCommunicator(MPI_COMM_WORLD, + gmx_physicalnode_id_hash())); + for (const auto &mode : runModes) { auto modeTargetsGpus = (mode.first.find("Gpu") != std::string::npos); @@ -137,6 +146,14 @@ void PmeTest::runTest(const RunModesList &runModes) // to test here. continue; } + auto modeTargetsPmeOnGpus = (mode.first.find("PmeOnGpu") != std::string::npos); + if (modeTargetsPmeOnGpus && !pme_gpu_supports_build(*hardwareInfo_, nullptr)) + { + // This run mode will cause a fatal error from mdrun when + // it finds an unsuitable device, which is not something + // we're trying to test here. + continue; + } runner_.edrFileName_ = fileManager_.getTemporaryFilePath(inputFile + "_" + mode.first + ".edr");