From: Mark Abraham Date: Thu, 14 Feb 2019 16:01:25 +0000 (+0100) Subject: Improved diagnostics when PME cannot run on GPUs X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=0b84efb5d85ce4d19e33b3b2aa05c7aef56218be;p=alexxy%2Fgromacs.git Improved diagnostics when PME cannot run on GPUs Fixes #2789, #2823 Change-Id: Iff7609a3774c11329d36bd8557b741a4d29f7c5a --- diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp index fab7970464..d687e855cb 100644 --- a/src/gromacs/ewald/pme.cpp +++ b/src/gromacs/ewald/pme.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -133,33 +133,48 @@ static bool addMessageIfNotSupported(const std::list &errorReasons, std::string *error) { - bool foundErrorReasons = errorReasons.empty(); - if (!foundErrorReasons && error) + bool isSupported = errorReasons.empty(); + if (!isSupported && error) { std::string regressionTestMarker = "PME GPU does not support"; // this prefix is tested for in the regression tests script gmxtest.pl - *error = regressionTestMarker + ": " + gmx::joinStrings(errorReasons, "; ") + "."; + *error = regressionTestMarker; + if (errorReasons.size() == 1) + { + *error += " " + errorReasons.back(); + } + else + { + *error += ": " + gmx::joinStrings(errorReasons, "; "); + } + *error += "."; } - return foundErrorReasons; + return isSupported; } -bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo, - std::string *error) +bool pme_gpu_supports_build(std::string *error) { std::list errorReasons; if (GMX_DOUBLE) { - errorReasons.emplace_back("double precision"); + errorReasons.emplace_back("a double-precision build"); } if (GMX_GPU == GMX_GPU_NONE) { - errorReasons.emplace_back("non-GPU build of GROMACS"); + errorReasons.emplace_back("a non-GPU build"); } + return addMessageIfNotSupported(errorReasons, error); +} + +bool pme_gpu_supports_hardware(const gmx_hw_info_t &hwinfo, + std::string *error) +{ + std::list errorReasons; if (GMX_GPU == GMX_GPU_OPENCL) { if (!areAllGpuDevicesFromAmd(hwinfo.gpu_info)) { - errorReasons.emplace_back("only AMD devices are supported"); + errorReasons.emplace_back("non-AMD devices"); } } return addMessageIfNotSupported(errorReasons, error); diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h index 5a4956a8b7..f4dffeed6a 100644 --- a/src/gromacs/ewald/pme.h +++ b/src/gromacs/ewald/pme.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -251,14 +251,22 @@ void gmx_pme_reinit_atoms(const gmx_pme_t *pme, int nAtoms, const real *charges) * TODO: this partly duplicates an internal PME assert function * pme_gpu_check_restrictions(), except that works with a * formed gmx_pme_t structure. Should that one go away/work with inputrec? + * + * \param[out] error If non-null, the error message when PME is not supported on GPU. + * + * \returns true if PME can run on GPU on this build, false otherwise. + */ +bool pme_gpu_supports_build(std::string *error); + +/*! \brief Checks whether the detected (GPU) hardware allows to run PME on GPU. * * \param[in] hwinfo Information about the detected hardware * \param[out] error If non-null, the error message when PME is not supported on GPU. * * \returns true if PME can run on GPU on this build, false otherwise. */ -bool pme_gpu_supports_build(const gmx_hw_info_t &hwinfo, - std::string *error); +bool pme_gpu_supports_hardware(const gmx_hw_info_t &hwinfo, + std::string *error); /*! \brief Checks whether the input system allows to run PME on GPU. * TODO: this partly duplicates an internal PME assert function diff --git a/src/gromacs/ewald/tests/pmetestcommon.cpp b/src/gromacs/ewald/tests/pmetestcommon.cpp index 6fa0627777..7475de602e 100644 --- a/src/gromacs/ewald/tests/pmetestcommon.cpp +++ b/src/gromacs/ewald/tests/pmetestcommon.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -85,7 +85,8 @@ bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo, break; case CodePath::GPU: - implemented = (pme_gpu_supports_build(hwinfo, nullptr) && + implemented = (pme_gpu_supports_build(nullptr) && + pme_gpu_supports_hardware(hwinfo, nullptr) && pme_gpu_supports_input(*inputRec, mtop, nullptr)); break; diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.cpp b/src/gromacs/ewald/tests/testhardwarecontexts.cpp index b4486fc7ca..5cbbcc3119 100644 --- a/src/gromacs/ewald/tests/testhardwarecontexts.cpp +++ b/src/gromacs/ewald/tests/testhardwarecontexts.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2017,2018, by the GROMACS development team, led by + * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -112,7 +112,8 @@ void PmeTestEnvironment::SetUp() hardwareContexts_.emplace_back(compat::make_unique(CodePath::CPU, "", nullptr)); hardwareInfo_ = hardwareInit(); - if (!pme_gpu_supports_build(*hardwareInfo_, nullptr)) + if (!pme_gpu_supports_build(nullptr) || + !pme_gpu_supports_hardware(*hardwareInfo_, nullptr)) { // PME can only run on the CPU, so don't make any more test contexts. return; diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 110c9e3c37..ae158a4984 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -621,10 +621,9 @@ int Mdrunner::mdrunner() inputrec->cutoff_scheme == ecutsVERLET, gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI), hw_opt.nthreads_tmpi); - auto canUseGpuForPme = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr); useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment, - canUseGpuForPme, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks); + *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; @@ -692,9 +691,9 @@ int Mdrunner::mdrunner() usingVerletScheme, gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI), gpusWereDetected); - auto canUseGpuForPme = pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, mtop, nullptr); useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, - canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks, + *hwinfo, *inputrec, mtop, + cr->nnodes, domdecOptions.numPmeRanks, gpusWereDetected); auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr); useGpuForBonded = diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp index 8e74f9b087..b1f2a547df 100644 --- a/src/gromacs/taskassignment/decidegpuusage.cpp +++ b/src/gromacs/taskassignment/decidegpuusage.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2015,2016,2017,2018, by the GROMACS development team, led by + * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -51,6 +51,7 @@ #include #include +#include "gromacs/ewald/pme.h" #include "gromacs/hardware/cpuinfo.h" #include "gromacs/hardware/detecthardware.h" #include "gromacs/hardware/hardwaretopology.h" @@ -153,14 +154,18 @@ decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbo const TaskTarget pmeTarget, const std::vector &gpuIdsToUse, const std::vector &userGpuTaskAssignment, - const bool canUseGpuForPme, + const gmx_hw_info_t &hardwareInfo, + const t_inputrec &inputrec, + const gmx_mtop_t &mtop, const int numRanksPerSimulation, const int numPmeRanksPerSimulation) { // First, exclude all cases where we can't run PME on GPUs. if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || - !canUseGpuForPme) + !pme_gpu_supports_build(nullptr) || + !pme_gpu_supports_hardware(hardwareInfo, nullptr) || + !pme_gpu_supports_input(inputrec, mtop, nullptr)) { // PME can't run on a GPU. If the user required that, we issue // an error later. @@ -335,7 +340,9 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarg bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded, const TaskTarget pmeTarget, const std::vector &userGpuTaskAssignment, - const bool canUseGpuForPme, + const gmx_hw_info_t &hardwareInfo, + const t_inputrec &inputrec, + const gmx_mtop_t &mtop, const int numRanksPerSimulation, const int numPmeRanksPerSimulation, const bool gpusWereDetected) @@ -350,18 +357,36 @@ bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded, if (pmeTarget == TaskTarget::Gpu) { GMX_THROW(NotImplementedError - ("The PME on the GPU is only supported when nonbonded interactions run on GPUs also.")); + ("PME on GPUs is only supported when nonbonded interactions run on GPUs also.")); } return false; } - if (!canUseGpuForPme) + std::string message; + if (!pme_gpu_supports_build(&message)) { if (pmeTarget == TaskTarget::Gpu) { - // TODO Pass in the inputrec so we can give more help here? GMX_THROW(NotImplementedError - ("The input simulation did not use PME in a way that is supported on the GPU.")); + ("Cannot compute PME interactions on a GPU, because " + message)); + } + return false; + } + if (!pme_gpu_supports_hardware(hardwareInfo, &message)) + { + if (pmeTarget == TaskTarget::Gpu) + { + GMX_THROW(NotImplementedError + ("Cannot compute PME interactions on a GPU, because " + message)); + } + return false; + } + if (!pme_gpu_supports_input(inputrec, mtop, &message)) + { + if (pmeTarget == TaskTarget::Gpu) + { + GMX_THROW(NotImplementedError + ("Cannot compute PME interactions on a GPU, because " + message)); } return false; } @@ -383,7 +408,7 @@ bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded, // Specifying -gputasks requires specifying everything. if (pmeTarget == TaskTarget::Auto) { - GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); + GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi? } return true; diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h index 34e61ffa92..02dcac5512 100644 --- a/src/gromacs/taskassignment/decidegpuusage.h +++ b/src/gromacs/taskassignment/decidegpuusage.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2017,2018, by the GROMACS development team, led by + * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -46,6 +46,8 @@ #include struct gmx_hw_info_t; +struct gmx_mtop_t; +struct t_inputrec; enum class EmulateGpuNonbonded : bool; @@ -102,7 +104,9 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(TaskTarget non * \param[in] pmeTarget The user's choice for mdrun -pme for where to assign long-ranged PME nonbonded interaction tasks. * \param[in] gpuIdsToUse The compatible GPUs that the user permitted us to use. * \param[in] userGpuTaskAssignment The user-specified assignment of GPU tasks to device IDs. - * \param[in] canUseGpuForPme Whether the form of PME chosen can run on a GPU + * \param[in] hardwareInfo Hardware information + * \param[in] inputrec The user input + * \param[in] mtop Global system topology * \param[in] numRanksPerSimulation The number of ranks in each simulation. * \param[in] numPmeRanksPerSimulation The number of PME ranks in each simulation. * @@ -114,7 +118,9 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(bool useGpuFor TaskTarget pmeTarget, const std::vector &gpuIdsToUse, const std::vector &userGpuTaskAssignment, - bool canUseGpuForPme, + const gmx_hw_info_t &hardwareInfo, + const t_inputrec &inputrec, + const gmx_mtop_t &mtop, int numRanksPerSimulation, int numPmeRanksPerSimulation); @@ -173,7 +179,9 @@ bool decideWhetherToUseGpusForNonbonded(TaskTarget nonbondedTarget, * \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions. * \param[in] pmeTarget The user's choice for mdrun -pme for where to assign long-ranged PME nonbonded interaction tasks. * \param[in] userGpuTaskAssignment The user-specified assignment of GPU tasks to device IDs. - * \param[in] canUseGpuForPme Whether the form of PME chosen can run on a GPU + * \param[in] hardwareInfo Hardware information + * \param[in] inputrec The user input + * \param[in] mtop Global system topology * \param[in] numRanksPerSimulation The number of ranks in each simulation. * \param[in] numPmeRanksPerSimulation The number of PME ranks in each simulation. * \param[in] gpusWereDetected Whether compatible GPUs were detected on any node. @@ -185,7 +193,9 @@ bool decideWhetherToUseGpusForNonbonded(TaskTarget nonbondedTarget, bool decideWhetherToUseGpusForPme(bool useGpuForNonbonded, TaskTarget pmeTarget, const std::vector &userGpuTaskAssignment, - bool canUseGpuForPme, + const gmx_hw_info_t &hardwareInfo, + const t_inputrec &inputrec, + const gmx_mtop_t &mtop, int numRanksPerSimulation, int numPmeRanksPerSimulation, bool gpusWereDetected); diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp index 5fa35b34eb..09d02464a6 100644 --- a/src/gromacs/taskassignment/resourcedivision.cpp +++ b/src/gromacs/taskassignment/resourcedivision.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2015,2016,2017,2018, by the GROMACS development team, led by + * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -356,7 +356,9 @@ int get_nthreads_mpi(const gmx_hw_info_t *hwinfo, if (pmeOnGpu) { GMX_RELEASE_ASSERT((EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) && - pme_gpu_supports_build(*hwinfo, nullptr) && pme_gpu_supports_input(*inputrec, *mtop, nullptr), + pme_gpu_supports_build(nullptr) && + pme_gpu_supports_hardware(*hwinfo, nullptr) && + pme_gpu_supports_input(*inputrec, *mtop, nullptr), "PME can't be on GPUs unless we are using PME"); // PME on GPUs supports a single PME rank with PP running on the same or few other ranks. diff --git a/src/programs/mdrun/tests/pmetest.cpp b/src/programs/mdrun/tests/pmetest.cpp index c2c734c53d..960ccbf276 100644 --- a/src/programs/mdrun/tests/pmetest.cpp +++ b/src/programs/mdrun/tests/pmetest.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -147,7 +147,9 @@ void PmeTest::runTest(const RunModesList &runModes) continue; } auto modeTargetsPmeOnGpus = (mode.first.find("PmeOnGpu") != std::string::npos); - if (modeTargetsPmeOnGpus && !pme_gpu_supports_build(*hardwareInfo_, nullptr)) + if (modeTargetsPmeOnGpus && + !(pme_gpu_supports_build(nullptr) && + pme_gpu_supports_hardware(*hardwareInfo_, nullptr))) { // This run mode will cause a fatal error from mdrun when // it finds an unsuitable device, which is not something