const std::vector<int> &gpuIdsToUse,
const std::vector<int> &userGpuTaskAssignment,
const bool canUseGpuForPme,
- const int numRanksPerSimulation)
+ const int numRanksPerSimulation,
+ const int numPmeRanksPerSimulation)
{
// First, exclude all cases where we can't run PME on GPUs.
if ((pmeTarget == TaskTarget::Cpu) ||
// PME on GPUs is only supported in a single case
if (pmeTarget == TaskTarget::Gpu)
{
- if (numRanksPerSimulation > 1)
+ if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
+ (numPmeRanksPerSimulation > 1))
{
GMX_THROW(InconsistentInputError
- ("When you run mdrun -pme gpu -gputasks, you must supply a PME .tpr file and use a single rank."));
+ ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
}
return true;
}
if (pmeTarget == TaskTarget::Gpu)
{
- if (numRanksPerSimulation > 1)
+ if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
+ (numPmeRanksPerSimulation > 1))
{
GMX_THROW(NotImplementedError
("PME tasks were required to run on GPUs, but that is not implemented with "
- "more than one rank. Use a single rank, or permit PME tasks to be assigned "
- "to the CPU."));
+ "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
+ "or permit PME tasks to be assigned to the CPU."));
}
return true;
}
const TaskTarget pmeTarget,
const std::vector<int> &userGpuTaskAssignment,
const bool canUseGpuForPme,
- const int numRanksPerSimulation)
+ const int numRanksPerSimulation,
+ const int numPmeRanksPerSimulation)
{
if (pmeTarget == TaskTarget::Cpu)
{
if (pmeTarget == TaskTarget::Gpu)
{
- if (numRanksPerSimulation > 1)
+ if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
+ (numPmeRanksPerSimulation > 1))
{
GMX_THROW(NotImplementedError
("PME tasks were required to run on GPUs, but that is not implemented with "
- "more than one rank. Use a single rank, or permit PME tasks to be assigned "
- "to the CPU."));
+ "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
+ "or permit PME tasks to be assigned to the CPU."));
}
return true;
}
* \param[in] userGpuTaskAssignment The user-specified assignment of GPU tasks to device IDs.
* \param[in] canUseGpuForPme Whether the form of PME chosen can run on a GPU
* \param[in] numRanksPerSimulation The number of ranks in each simulation.
+ * \param[in] numPmeRanksPerSimulation The number of PME ranks in each simulation.
*
* \returns Whether the simulation will run PME tasks on GPUs.
*
const std::vector<int> &gpuIdsToUse,
const std::vector<int> &userGpuTaskAssignment,
const bool canUseGpuForPme,
- const int numRanksPerSimulation);
+ const int numRanksPerSimulation,
+ const int numPmeRanksPerSimulation);
/*! \brief Decide whether the simulation will try to run nonbonded
* tasks on GPUs.
* \param[in] userGpuTaskAssignment The user-specified assignment of GPU tasks to device IDs.
* \param[in] canUseGpuForPme Whether the form of PME chosen can run on a GPU
* \param[in] numRanksPerSimulation The number of ranks in each simulation.
+ * \param[in] numPmeRanksPerSimulation The number of PME ranks in each simulation.
*
* \returns Whether the simulation will run nonbonded and PME tasks, respectively, on GPUs.
*
const TaskTarget pmeTarget,
const std::vector<int> &userGpuTaskAssignment,
const bool canUseGpuForPme,
- const int numRanksPerSimulation);
+ const int numRanksPerSimulation,
+ const int numPmeRanksPerSimulation);
}
pme_gpu_supports_input(inputrec, nullptr),
"PME can't be on GPUs unless we are using PME");
- // A single rank is all that is supported with PME on GPUs
+ // PME on GPUs supports a single PME rank with PP running on the same or few other ranks.
+ // For now, let's treat separate PME GPU rank as opt-in.
if (hw_opt->nthreads_tmpi < 1)
{
return 1;
}
- if (hw_opt->nthreads_tmpi > 1)
- {
- gmx_fatal(FARGS, "PME on GPUs is only supported with a single rank");
- }
}
{
auto canUseGpuForPme = inputSystemHasPme && pme_gpu_supports_input(inputrec, nullptr);
useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
(useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
- canUseGpuForPme, hw_opt.nthreads_tmpi);
+ canUseGpuForPme, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
/* Determine how many thread-MPI ranks to start.
gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, doRerun));
auto inputSystemHasPme = EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype);
auto canUseGpuForPme = inputSystemHasPme && pme_gpu_supports_input(inputrec, nullptr);
- useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, canUseGpuForPme, cr->nnodes);
+ useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, canUseGpuForPme, cr->nnodes, domdecOptions.numPmeRanks);
pmeRunMode = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
if ((pmeRunMode == PmeRunMode::GPU) && (pmeFftTarget == TaskTarget::Cpu))
{
if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
{
- /* With GPUs we don't automatically use PME-only ranks. PME ranks can
+ /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
* improve performance with many threads per GPU, since our OpenMP
* scaling is bad, but it's difficult to automate the setup.
*/
- domdecOptions.numPmeRanks = 0;
+ if (useGpuForPme && (hw_opt.nthreads_tmpi > 1))
+ {
+ domdecOptions.numPmeRanks = 1;
+ //TODO print appropriate notice on why asking for multiple threads and -pme gpu causes a separate PME rank to start
+ }
+ else
+ {
+ domdecOptions.numPmeRanks = 0;
+ }
}
if (useGpuForPme)
{
if (domdecOptions.numPmeRanks < 0)
{
- domdecOptions.numPmeRanks = 0; // separate GPU ranks not supported
+ domdecOptions.numPmeRanks = 0;
+ // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
}
else
{
- GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks == 0, "Separate PME GPU ranks are not yet supported");
+ GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported");
}
}
if (pmeGpuTaskMapping != gpuTaskAssignment.end())
{
pmeDeviceInfo = getDeviceInfo(hwinfo->gpu_info, pmeGpuTaskMapping->deviceId_);
+ init_gpu(mdlog, pmeDeviceInfo);
}
/* getting number of PP/PME threads
{
try
{
- if (pmeDeviceInfo != nullptr && pmeDeviceInfo != nonbondedDeviceInfo)
- {
- GMX_THROW(NotImplementedError
- ("PME on a GPU can run only on the same GPU as nonbonded, because "
- "context switching is not yet supported."));
- }
pmedata = gmx_pme_init(cr, npme_major, npme_minor, inputrec,
mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed,
mdrunOptions.reproducible,
# make an "object library" for code that we re-use for both kinds of tests
add_library(mdrun_test_objlib OBJECT
+ energyreader.cpp
mdruncomparisonfixture.cpp
moduletest.cpp
terminationhelper.cpp
+ # PME tests
+ pmetest.cpp
)
set(testname "MdrunTests")
${exename}
# files with code for tests
tabulated_bonded_interactions.cpp
- energyreader.cpp
grompp.cpp
initialconstraints.cpp
- pmetest.cpp
rerun.cpp
trajectory_writing.cpp
trajectoryreader.cpp
*/
/*! \internal \file
* \brief
- * This implements basic PME sanity test (using single-rank mdrun).
+ * This implements basic PME sanity tests.
* It runs the input system with PME for several steps (on CPU and GPU, if available),
* and checks the reciprocal and conserved energies.
- * TODO: implement multi-rank tests as well.
+ * As part of mdrun-test, this will always run single rank PME simulation.
+ * As part of mdrun-mpi-test, this will run same as above when a single rank is requested,
+ * or a simulation with a single separate PME rank ("-npme 1") when multiple ranks are requested.
+ * \todo Extend and generalize this for more multi-rank tests (-npme 0, -npme 2, etc).
+ * \todo Implement death tests (e.g. for PME GPU decomposition).
*
* \author Aleksei Iupinov <a.yupinov@gmail.com>
* \ingroup module_mdrun_integration_tests
#include <string>
#include <vector>
+#include <gtest/gtest-spi.h>
+
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/hardware/gpu_hw_info.h"
#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/stringutil.h"
+#include "testutils/mpitest.h"
#include "testutils/refdata.h"
#include "energyreader.h"
const std::string inputFile = "spc-and-methanol";
runner_.useTopGroAndNdxFromDatabase(inputFile.c_str());
+ // With single rank we can and will always test PP+PME as part of mdrun-test.
+ // With multiple ranks we can additionally test a single PME-only rank within mdrun-mpi-test.
+ const bool parallelRun = (getNumberOfTestMpiRanks() > 1);
+ const bool useSeparatePme = parallelRun;
+
EXPECT_EQ(0, runner_.callGrompp());
//TODO test all proper/improper combinations in more thorough way?
runModes["PmeOnGpuFftAuto"] = {"-pme", "gpu", "-pmefft", "auto"};
TestReferenceData refData;
TestReferenceChecker rootChecker(refData.rootChecker());
-
+ const bool thisRankChecks = (gmx_node_rank() == 0);
+ if (!thisRankChecks)
+ {
+ EXPECT_NONFATAL_FAILURE(rootChecker.checkUnusedEntries(), ""); // skip checks on other ranks
+ }
for (const auto &mode : runModes)
{
auto modeTargetsGpus = (mode.first.find("Gpu") != std::string::npos);
CommandLine commandLine(mode.second);
commandLine.append("-notunepme"); // for reciprocal energy reproducibility
+ if (useSeparatePme)
+ {
+ commandLine.addOption("-npme", 1);
+ }
ASSERT_EQ(0, runner_.callMdrun(commandLine));
- auto energyReader = openEnergyFileToReadFields(runner_.edrFileName_, {"Coul. recip.", "Total Energy", "Kinetic En."});
- auto conservedChecker = rootChecker.checkCompound("Energy", "Conserved");
- auto reciprocalChecker = rootChecker.checkCompound("Energy", "Reciprocal");
- for (int i = 0; i <= nsteps; i++)
+ if (thisRankChecks)
{
- EnergyFrame frame = energyReader->frame();
- std::string stepNum = gmx::formatString("%d", i);
- const real conservedEnergy = frame.at("Total Energy");
- const real reciprocalEnergy = frame.at("Coul. recip.");
- if (i == 0)
+ auto energyReader = openEnergyFileToReadFields(runner_.edrFileName_, {"Coul. recip.", "Total Energy", "Kinetic En."});
+ auto conservedChecker = rootChecker.checkCompound("Energy", "Conserved");
+ auto reciprocalChecker = rootChecker.checkCompound("Energy", "Reciprocal");
+ for (int i = 0; i <= nsteps; i++)
{
- // use first step values as references for tolerance
- const real startingKineticEnergy = frame.at("Kinetic En.");
- const auto conservedTolerance = relativeToleranceAsFloatingPoint(startingKineticEnergy, 2e-5);
- const auto reciprocalTolerance = relativeToleranceAsFloatingPoint(reciprocalEnergy, 2e-5);
- reciprocalChecker.setDefaultTolerance(reciprocalTolerance);
- conservedChecker.setDefaultTolerance(conservedTolerance);
+ EnergyFrame frame = energyReader->frame();
+ std::string stepNum = gmx::formatString("%d", i);
+ const real conservedEnergy = frame.at("Total Energy");
+ const real reciprocalEnergy = frame.at("Coul. recip.");
+ if (i == 0)
+ {
+ // use first step values as references for tolerance
+ const real startingKineticEnergy = frame.at("Kinetic En.");
+ const auto conservedTolerance = relativeToleranceAsFloatingPoint(startingKineticEnergy, 2e-5);
+ const auto reciprocalTolerance = relativeToleranceAsFloatingPoint(reciprocalEnergy, 3e-5);
+ reciprocalChecker.setDefaultTolerance(reciprocalTolerance);
+ conservedChecker.setDefaultTolerance(conservedTolerance);
+ }
+ conservedChecker.checkReal(conservedEnergy, stepNum.c_str());
+ reciprocalChecker.checkReal(reciprocalEnergy, stepNum.c_str());
}
- conservedChecker.checkReal(conservedEnergy, stepNum.c_str());
- reciprocalChecker.checkReal(reciprocalEnergy, stepNum.c_str());
}
+ // FIXME: without this barrier, one of the mdruns was somehow having a non-PME inputrec (!)
+#if GMX_LIB_MPI
+ if (parallelRun)
+ {
+ MPI_Barrier(MPI_COMM_WORLD);
+ }
+#endif
+ }
+
+ // This is a workaround for the output files to not be deleted in a parallel run.
+ // TODO: consider moving the barrier into the file manager destructor.
+#if GMX_LIB_MPI
+ if (parallelRun)
+ {
+ MPI_Barrier(MPI_COMM_WORLD);
}
+#endif
}
}