include both constraints and virtual sites. This improves performance
by eliminating overheads during the update, at no cost.
* Intel integrated GPUs are now supported with OpenCL.
-* PME long-ranged interactions can now also run on a single GPU using
- OpenCL, which means many fewer CPU cores are needed for good
+* PME long-ranged interactions can now also run on a single AMD GPU
+ using OpenCL, which means many fewer CPU cores are needed for good
performance with such hardware.
* TODO Other stuff
- On NVIDIA GPUs the OpenCL kernels achieve much lower performance
than the equivalent CUDA kernels due to limitations of the NVIDIA OpenCL
compiler.
+- PME is currently only supported on AMD devices, because of known
+ issues with devices from other vendors
Limitations of interest to |Gromacs| developers:
#include "gromacs/fileio/pdbio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
+#include "gromacs/hardware/hw_info.h"
#include "gromacs/math/gmxcomplex.h"
#include "gromacs/math/invertmatrix.h"
#include "gromacs/math/units.h"
#include "gromacs/timing/walltime_accounting.h"
#include "gromacs/topology/topology.h"
#include "gromacs/utility/basedefinitions.h"
+#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxmpi.h"
return foundErrorReasons;
}
-bool pme_gpu_supports_build(std::string *error)
+bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo,
+ std::string *error)
{
std::list<std::string> errorReasons;
if (GMX_DOUBLE)
{
errorReasons.emplace_back("non-GPU build of GROMACS");
}
+ if (GMX_GPU == GMX_GPU_OPENCL)
+ {
+ if (!areAllGpuDevicesFromAmd(hwinfo.gpu_info))
+ {
+ errorReasons.emplace_back("only AMD devices are supported");
+ }
+ }
return addMessageIfNotSupported(errorReasons, error);
}
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
+struct gmx_hw_info_t;
struct interaction_const_t;
struct t_commrec;
struct t_inputrec;
* pme_gpu_check_restrictions(), except that works with a
* formed gmx_pme_t structure. Should that one go away/work with inputrec?
*
- * \param[out] error If non-null, the error message when PME is not supported on GPU.
+ * \param[in] hwinfo Information about the detected hardware
+ * \param[out] error If non-null, the error message when PME is not supported on GPU.
*
* \returns true if PME can run on GPU on this build, false otherwise.
*/
-bool pme_gpu_supports_build(std::string *error);
+bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo,
+ std::string *error);
/*! \brief Checks whether the input system allows to run PME on GPU.
* TODO: this partly duplicates an internal PME assert function
for (const auto &context : getPmeTestEnv()->getHardwareContexts())
{
CodePath codePath = context->getCodePath();
- const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+ const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
if (!supportedInput)
{
/* Testing the failure for the unsupported input */
for (const auto &context : getPmeTestEnv()->getHardwareContexts())
{
CodePath codePath = context->getCodePath();
- const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+ const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
if (!supportedInput)
{
/* Testing the failure for the unsupported input */
for (const auto &context : getPmeTestEnv()->getHardwareContexts())
{
CodePath codePath = context->getCodePath();
- const bool supportedInput = pmeSupportsInputForMode(&inputRec, codePath);
+ const bool supportedInput = pmeSupportsInputForMode(*getPmeTestEnv()->hwinfo(), &inputRec, codePath);
if (!supportedInput)
{
/* Testing the failure for the unsupported input */
namespace test
{
-bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode)
+bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo,
+ const t_inputrec *inputRec,
+ CodePath mode)
{
bool implemented;
gmx_mtop_t mtop;
break;
case CodePath::GPU:
- implemented = (pme_gpu_supports_build(nullptr) &&
+ implemented = (pme_gpu_supports_build(hwinfo, nullptr) &&
pme_gpu_supports_input(*inputRec, mtop, nullptr));
break;
// Misc.
//! Tells if this generally valid PME input is supported for this mode
-bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode);
+bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo,
+ const t_inputrec *inputRec,
+ CodePath mode);
//! Spline moduli are computed in double precision, so they're very good in single precision
constexpr int64_t c_splineModuliSinglePrecisionUlps = 1;
#include "gromacs/compat/make_unique.h"
#include "gromacs/ewald/pme.h"
#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/detecthardware.h"
#include "gromacs/hardware/hw_info.h"
#include "gromacs/utility/basenetwork.h"
#include "gromacs/utility/exceptions.h"
hardwareContexts_.emplace_back(compat::make_unique<TestHardwareContext>(CodePath::CPU, "", nullptr));
hardwareInfo_ = hardwareInit();
- if (!pme_gpu_supports_build(nullptr))
+ if (!pme_gpu_supports_build(*hardwareInfo_, nullptr))
{
// PME can only run on the CPU, so don't make any more test contexts.
return;
#include <gtest/gtest.h>
#include "gromacs/ewald/pme-gpu-program.h"
-#include "gromacs/hardware/detecthardware.h"
#include "gromacs/hardware/gpu_hw_info.h"
+struct gmx_hw_info_t;
+
namespace gmx
{
namespace test
void TearDown() override;
//! Get available hardware contexts.
const TestHardwareContexts &getHardwareContexts() const {return hardwareContexts_; }
+ //! Get available hardware information.
+ const gmx_hw_info_t *hwinfo() const { return hardwareInfo_; }
};
//! Get the test environment
const gmx_gpu_info_t &GPU_FUNC_ARGUMENT(gpu_info),
int GPU_FUNC_ARGUMENT(index)) GPU_FUNC_TERM
+/*! \brief Returns whether all compatible OpenCL devices are from AMD.
+ *
+ * This is currently the most useful and best tested platform for
+ * supported OpenCL devices, so some modules may need to check what
+ * degree of support they should offer.
+ *
+ * \todo An enumeration visible in the hardware module would make such
+ * checks more configurable, if we discover other needs in future.
+ *
+ * \returns whether all detected compatible devices have AMD for the vendor.
+ */
+OPENCL_FUNC_QUALIFIER
+bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &OPENCL_FUNC_ARGUMENT(gpuInfo))
+OPENCL_FUNC_TERM_WITH_RETURN(false)
+
/*! \brief Returns the size of the gpu_dev_info struct.
*
* The size of gpu_dev_info can be used for allocation and communication.
}
}
+bool areAllGpuDevicesFromAmd(const gmx_gpu_info_t &gpuInfo)
+{
+ bool result = true;
+ for (int i = 0; i < gpuInfo.n_dev; ++i)
+ {
+ if ((gpuInfo.gpu_dev[i].stat == egpuCompatible) &&
+ (gpuInfo.gpu_dev[i].vendor_e != OCL_VENDOR_AMD))
+ {
+ result = false;
+ break;
+ }
+ }
+ return result;
+}
+
//! This function is documented in the header file
void init_gpu(const gmx_device_info_t *deviceInfo)
{
inputrec->cutoff_scheme == ecutsVERLET,
gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI),
hw_opt.nthreads_tmpi);
- auto canUseGpuForPme = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
+ auto canUseGpuForPme = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
(useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
canUseGpuForPme, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
emulateGpuNonbonded, usingVerletScheme,
gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
gpusWereDetected);
- auto canUseGpuForPme = pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
+ auto canUseGpuForPme = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr);
useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks,
gpusWereDetected);
if (pmeOnGpu)
{
GMX_RELEASE_ASSERT((EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) &&
- pme_gpu_supports_build(nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr),
+ pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr),
"PME can't be on GPUs unless we are using PME");
// PME on GPUs supports a single PME rank with PP running on the same or few other ranks.
#include <gtest/gtest-spi.h>
+#include "gromacs/ewald/pme.h"
#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/hardware/detecthardware.h"
#include "gromacs/hardware/gpu_hw_info.h"
#include "gromacs/trajectory/energyframe.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/loggerbuilder.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
#include "gromacs/utility/stringutil.h"
#include "testutils/mpitest.h"
{
EXPECT_NONFATAL_FAILURE(rootChecker.checkUnusedEntries(), ""); // skip checks on other ranks
}
+
+ auto hardwareInfo_ = gmx_detect_hardware(MDLogger {},
+ PhysicalNodeCommunicator(MPI_COMM_WORLD,
+ gmx_physicalnode_id_hash()));
+
for (const auto &mode : runModes)
{
auto modeTargetsGpus = (mode.first.find("Gpu") != std::string::npos);
// to test here.
continue;
}
+ auto modeTargetsPmeOnGpus = (mode.first.find("PmeOnGpu") != std::string::npos);
+ if (modeTargetsPmeOnGpus && !pme_gpu_supports_build(*hardwareInfo_, nullptr))
+ {
+ // This run mode will cause a fatal error from mdrun when
+ // it finds an unsuitable device, which is not something
+ // we're trying to test here.
+ continue;
+ }
runner_.edrFileName_ = fileManager_.getTemporaryFilePath(inputFile + "_" + mode.first + ".edr");