From 39566cbfcc728d30750c534d1f12917ff35aafc3 Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Tue, 10 Sep 2019 11:01:19 +0200 Subject: [PATCH] Enable TPI with the Verlet cut-off scheme This requires a special treatment for the nbnxm gridding and search, with one grid for the system to insert in and one grid for the molecule to insert. Also reenabled the TPI tests. Change-Id: Ia3888daf4b06a94b0a7a03ec17b7bbe27084ac3d --- docs/user-guide/mdp-options.rst | 10 +- src/gromacs/gmxpreprocess/readir.cpp | 10 +- src/gromacs/mdlib/forcerec.cpp | 11 +- src/gromacs/mdlib/sim_util.cpp | 19 +- src/gromacs/mdrun/runner.cpp | 58 ++++-- src/gromacs/mdrun/tpi.cpp | 194 +++++++++++++----- src/gromacs/nbnxm/grid.cpp | 32 ++- src/gromacs/nbnxm/grid.h | 4 +- src/gromacs/nbnxm/gridset.cpp | 50 +++-- src/gromacs/nbnxm/gridset.h | 14 +- src/gromacs/nbnxm/nbnxm_setup.cpp | 1 + src/gromacs/nbnxm/pairlist.cpp | 5 + src/gromacs/nbnxm/pairsearch.cpp | 3 +- src/gromacs/nbnxm/pairsearch.h | 1 + .../taskassignment/resourcedivision.cpp | 27 ++- src/gromacs/taskassignment/resourcedivision.h | 7 +- src/programs/mdrun/tests/CMakeLists.txt | 15 +- src/programs/mdrun/tests/tpitest.cpp | 4 +- 18 files changed, 336 insertions(+), 129 deletions(-) diff --git a/docs/user-guide/mdp-options.rst b/docs/user-guide/mdp-options.rst index 711c877128..1f76fd1972 100644 --- a/docs/user-guide/mdp-options.rst +++ b/docs/user-guide/mdp-options.rst @@ -563,10 +563,12 @@ Neighbor searching .. mdp:: rlist (1) [nm] - Cut-off distance for the short-range neighbor list. With the - :mdp-value:`cutoff-scheme=Verlet` :mdp:`cutoff-scheme`, this is by default set by the - :mdp:`verlet-buffer-tolerance` option and the value of - :mdp:`rlist` is ignored. + Cut-off distance for the short-range neighbor list. With dynamics, + this is by default set by the :mdp:`verlet-buffer-tolerance` option + and the value of :mdp:`rlist` is ignored. Without dynamics, this + is by default set to the maximum cut-off plus 5% buffer, except + for test particle insertion, where the buffer is managed exactly + and automatically. Electrostatics diff --git a/src/gromacs/gmxpreprocess/readir.cpp b/src/gromacs/gmxpreprocess/readir.cpp index 8f92c0cacd..c0623751e8 100644 --- a/src/gromacs/gmxpreprocess/readir.cpp +++ b/src/gromacs/gmxpreprocess/readir.cpp @@ -368,7 +368,13 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts, rc_max = std::max(ir->rvdw, ir->rcoulomb); - if (ir->verletbuf_tol <= 0) + if (EI_TPI(ir->eI)) + { + /* With TPI we set the pairlist cut-off later using the radius of the insterted molecule */ + ir->verletbuf_tol = 0; + ir->rlist = rc_max; + } + else if (ir->verletbuf_tol <= 0) { if (ir->verletbuf_tol == 0) { @@ -546,8 +552,6 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts, CHECK(ir->nstlist <= 0); sprintf(err_buf, "TPI does not work with full electrostatics other than PME"); CHECK(EEL_FULL(ir->coulombtype) && !EEL_PME(ir->coulombtype)); - sprintf(err_buf, "TPI does not work (yet) with the Verlet cut-off scheme"); - CHECK(ir->cutoff_scheme == ecutsVERLET); } /* SHAKE / LINCS */ diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp index 8230129f2b..0b478cf110 100644 --- a/src/gromacs/mdlib/forcerec.cpp +++ b/src/gromacs/mdlib/forcerec.cpp @@ -1475,7 +1475,6 @@ void init_forcerec(FILE *fp, real rtab; char *env; double dbl; - const t_block *cgs; gmx_bool bGenericKernelOnly; gmx_bool bFEP_NonBonded; @@ -1491,16 +1490,8 @@ void init_forcerec(FILE *fp, if (EI_TPI(ir->eI)) { /* Set to the size of the molecule to be inserted (the last one) */ - /* Because of old style topologies, we have to use the last cg - * instead of the last molecule type. - */ - cgs = &mtop->moltype[mtop->molblock.back().type].cgs; - fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1]; gmx::RangePartitioning molecules = gmx_mtop_molecules(*mtop); - if (fr->n_tpi != molecules.block(molecules.numBlocks() - 1).size()) - { - gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group."); - } + fr->n_tpi = molecules.block(molecules.numBlocks() - 1).size(); } else { diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 768ca45777..945165cdc9 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -1118,7 +1118,7 @@ void do_force(FILE *fplog, nbv->atomdata_init_add_nbat_f_to_f_gpu(); } } - else + else if (!EI_TPI(inputrec->eI)) { if (useGpuXBufOps == BufferOpsUseGpu::True) { @@ -1376,13 +1376,16 @@ void do_force(FILE *fplog, step, nrnb, wcycle); } - /* Add all the non-bonded force to the normal force array. - * This can be split into a local and a non-local part when overlapping - * communication with calculation with domain decomposition. - */ - wallcycle_stop(wcycle, ewcFORCE); - nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.forceWithShiftForces().force()); - wallcycle_start_nocount(wcycle, ewcFORCE); + if (forceFlags.computeForces) + { + /* Add all the non-bonded force to the normal force array. + * This can be split into a local and a non-local part when overlapping + * communication with calculation with domain decomposition. + */ + wallcycle_stop(wcycle, ewcFORCE); + nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.forceWithShiftForces().force()); + wallcycle_start_nocount(wcycle, ewcFORCE); + } /* If there are multiple fshift output buffers we need to reduce them */ if (forceFlags.computeVirial) diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 64248d311d..5caa3caee5 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -394,24 +394,36 @@ namespace gmx * falling back to CPU code. With thread-MPI, only the first * call to this function should have \c issueWarning true. */ static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger &mdlog, - const t_inputrec *ir, + const t_inputrec &ir, bool issueWarning) { - if (ir->opts.ngener - ir->nwall > 1) + bool gpuIsUseful = true; + std::string warning; + + if (ir.opts.ngener - ir.nwall > 1) { /* The GPU code does not support more than one energy group. * If the user requested GPUs explicitly, a fatal error is given later. */ - if (issueWarning) - { - GMX_LOG(mdlog.warning).asParagraph() - .appendText("Multiple energy groups is not implemented for GPUs, falling back to the CPU. " - "For better performance, run on the GPU without energy groups and then do " - "gmx mdrun -rerun option on the trajectory with an energy group .tpr file."); - } - return false; + gpuIsUseful = false; + warning = + "Multiple energy groups is not implemented for GPUs, falling back to the CPU. " + "For better performance, run on the GPU without energy groups and then do " + "gmx mdrun -rerun option on the trajectory with an energy group .tpr file."; } - return true; + + if (EI_TPI(ir.eI)) + { + gpuIsUseful = false; + warning = "TPI is not implemented for GPUs."; + } + + if (!gpuIsUseful && issueWarning) + { + GMX_LOG(mdlog.warning).asParagraph().appendText(warning); + } + + return gpuIsUseful; } //! Initializes the logger for mdrun. @@ -605,8 +617,6 @@ int Mdrunner::mdrunner() /* CAUTION: threads may be started later on in this function, so cr doesn't reflect the final parallel state right now */ - t_inputrec inputrecInstance; - t_inputrec *inputrec = &inputrecInstance; gmx_mtop_t mtop; bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data()); @@ -659,6 +669,9 @@ int Mdrunner::mdrunner() // Print citation requests after all software/hardware printing pleaseCiteGromacs(fplog); + // TODO Replace this by unique_ptr once t_inputrec is C++ + t_inputrec inputrecInstance; + t_inputrec *inputrec = nullptr; std::unique_ptr globalState; if (SIMMASTER(cr)) @@ -667,11 +680,13 @@ int Mdrunner::mdrunner() globalState = std::make_unique(); /* Read (nearly) all data required for the simulation */ - read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), inputrec, globalState.get(), &mtop); + read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), + &inputrecInstance, globalState.get(), &mtop); + inputrec = &inputrecInstance; } /* Check and update the hardware options for internal consistency */ - checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks); + checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks, inputrec); if (GMX_THREAD_MPI && SIMMASTER(cr)) { @@ -679,6 +694,8 @@ int Mdrunner::mdrunner() bool useGpuForPme = false; try { + GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy"); + // If the user specified the number of ranks, then we must // respect that, but in default mode, we need to allow for // the number of GPUs to choose the number of ranks. @@ -686,7 +703,7 @@ int Mdrunner::mdrunner() useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded, - gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI), + gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI), hw_opt.nthreads_tmpi); useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment, @@ -723,8 +740,13 @@ int Mdrunner::mdrunner() if (PAR(cr)) { /* now broadcast everything to the non-master nodes/threads: */ + if (!SIMMASTER(cr)) + { + inputrec = &inputrecInstance; + } init_parallel(cr, inputrec, &mtop); } + GMX_RELEASE_ASSERT(inputrec != nullptr, "All range should have a valid inputrec now"); // Now each rank knows the inputrec that SIMMASTER read and used, // and (if applicable) cr->nnodes has been assigned the number of @@ -754,7 +776,7 @@ int Mdrunner::mdrunner() useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded, - gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI), + gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected); useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo, *inputrec, mtop, @@ -1044,7 +1066,7 @@ int Mdrunner::mdrunner() &hw_opt, hwinfo->nthreads_hw_avail, FALSE); /* Check and update the number of OpenMP threads requested */ checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_, - pmeRunMode, mtop); + pmeRunMode, mtop, *inputrec); gmx_omp_nthreads_init(mdlog, cr, hwinfo->nthreads_hw_avail, diff --git a/src/gromacs/mdrun/tpi.cpp b/src/gromacs/mdrun/tpi.cpp index 382a32be2f..3afadf161f 100644 --- a/src/gromacs/mdrun/tpi.cpp +++ b/src/gromacs/mdrun/tpi.cpp @@ -70,6 +70,7 @@ #include "gromacs/mdlib/energyoutput.h" #include "gromacs/mdlib/force.h" #include "gromacs/mdlib/force_flags.h" +#include "gromacs/mdlib/gmx_omp_nthreads.h" #include "gromacs/mdlib/mdatoms.h" #include "gromacs/mdlib/tgroup.h" #include "gromacs/mdlib/update.h" @@ -82,6 +83,7 @@ #include "gromacs/mdtypes/md_enums.h" #include "gromacs/mdtypes/mdrunoptions.h" #include "gromacs/mdtypes/state.h" +#include "gromacs/nbnxm/nbnxm.h" #include "gromacs/pbcutil/pbc.h" #include "gromacs/random/threefry.h" #include "gromacs/random/uniformrealdistribution.h" @@ -129,6 +131,31 @@ static void realloc_bins(double **bin, int *nbin, int nbin_new) } } +//! Computes and returns the RF exclusion energy for the last molecule starting at \p beginAtom +static real +reactionFieldExclusionCorrection(gmx::ArrayRef x, + const t_mdatoms &mdatoms, + const interaction_const_t &ic, + const int beginAtom) +{ + real energy = 0; + + for (int i = beginAtom; i < mdatoms.homenr; i++) + { + const real qi = mdatoms.chargeA[i]; + energy -= 0.5*qi*qi*ic.c_rf; + + for (int j = i + 1; j < mdatoms.homenr; j++) + { + const real qj = mdatoms.chargeA[j]; + const real rsq = distance2(x[i], x[j]); + energy += qi*qj*(ic.k_rf*rsq - ic.c_rf); + } + } + + return ic.epsfac*energy; +} + namespace gmx { @@ -136,6 +163,8 @@ namespace gmx void LegacySimulator::do_tpi() { + GMX_RELEASE_ASSERT(gmx_omp_nthreads_get(emntDefault) == 1, "TPI does not support OpenMP"); + gmx_localtop_t top; PaddedVector f {}; real lambda, t, temp, beta, drmax, epot; @@ -146,7 +175,7 @@ LegacySimulator::do_tpi() tensor force_vir, shake_vir, vir, pres; int a_tp0, a_tp1, ngid, gid_tp, nener, e; rvec *x_mol; - rvec mu_tot, x_init, dx, x_tp; + rvec mu_tot, x_init, dx; int nnodes, frame; int64_t frame_step_prev, frame_step; int64_t nsteps, stepblocksize = 0, step; @@ -177,11 +206,6 @@ LegacySimulator::do_tpi() real bU_bin_limit = 50; real bU_logV_bin_limit = bU_bin_limit + 10; - if (inputrec->cutoff_scheme == ecutsVERLET) - { - gmx_fatal(FARGS, "TPI does not work (yet) with the Verlet cut-off scheme"); - } - nnodes = cr->nnodes; gmx_mtop_generate_local_top(*top_global, &top, inputrec->efep != efepNO); @@ -279,13 +303,31 @@ LegacySimulator::do_tpi() fprintf(debug, "TPI atoms %d-%d\n", a_tp0, a_tp1); } - GMX_RELEASE_ASSERT(inputrec->rcoulomb <= inputrec->rlist && inputrec->rvdw <= inputrec->rlist, "Twin-range interactions are not supported with TPI"); + auto x = makeArrayRef(state_global->x); + + if (EEL_PME(fr->ic->eeltype)) + { + gmx_pme_reinit_atoms(fr->pmedata, a_tp0, nullptr); + } + + /* With reacion-field we have distance dependent potentials + * between excluded atoms, we need to add these separately + * for the inserted molecule. + */ + real rfExclusionEnergy = 0; + if (EEL_RF(fr->ic->eeltype)) + { + rfExclusionEnergy = reactionFieldExclusionCorrection(x, *mdatoms, *fr->ic, a_tp0); + if (debug) + { + fprintf(debug, "RF exclusion correction for inserted molecule: %f kJ/mol\n", rfExclusionEnergy); + } + } snew(x_mol, a_tp1-a_tp0); bDispCorr = (inputrec->eDispCorr != edispcNO); bCharge = FALSE; - auto x = makeArrayRef(state_global->x); for (i = a_tp0; i < a_tp1; i++) { /* Copy the coordinates of the molecule to be insterted */ @@ -296,12 +338,24 @@ LegacySimulator::do_tpi() } bRFExcl = (bCharge && EEL_RF(fr->ic->eeltype)); - // TODO: Calculate the center of geometry of the molecule to insert -#if 0 - calc_cgcm(fplog, cg_tp, cg_tp+1, &(top.cgs), state_global->x.rvec_array(), fr->cg_cm); + // Calculate the center of geometry of the molecule to insert + rvec cog = { 0, 0, 0 }; + for (int a = a_tp0; a < a_tp1; a++) + { + rvec_inc(cog, x[a]); + } + svmul(1.0_real/(a_tp1 - a_tp0), cog, cog); + real molRadius = 0; + for (int a = a_tp0; a < a_tp1; a++) + { + molRadius = std::max(molRadius, distance2(x[a], cog)); + } + molRadius = std::sqrt(molRadius); + + const real maxCutoff = std::max(inputrec->rvdw, inputrec->rcoulomb); if (bCavity) { - if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog) + if (norm(cog) > 0.5*maxCutoff && fplog) { fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n"); fprintf(stderr, "WARNING: Your TPI molecule is not centered at 0,0,0\n"); @@ -312,10 +366,9 @@ LegacySimulator::do_tpi() /* Center the molecule to be inserted at zero */ for (i = 0; i < a_tp1-a_tp0; i++) { - rvec_dec(x_mol[i], fr->cg_cm[cg_tp]); + rvec_dec(x_mol[i], cog); } } -#endif if (fplog) { @@ -330,6 +383,8 @@ LegacySimulator::do_tpi() { if (inputrec->nstlist > 1) { + + /* With the same pair list we insert in a sphere of radius rtpi in different orientations */ if (drmax == 0 && a_tp1-a_tp0 == 1) { gmx_fatal(FARGS, "Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense", inputrec->nstlist, drmax); @@ -348,12 +403,27 @@ LegacySimulator::do_tpi() } } + /* With the same pair list we insert in a sphere of radius rtpi + * in different orientations. We generate the pairlist with all + * inserted atoms located in the center of the sphere, so we need + * a buffer of size of the sphere and molecule radius. + */ + inputrec->rlist = maxCutoff + 2*inputrec->rtpi + 2*molRadius; + fr->rlist = inputrec->rlist; + fr->nbv->changePairlistRadii(inputrec->rlist, inputrec->rlist); + ngid = groups->groups[SimulationAtomGroupType::EnergyOutput].size(); - // TODO: Figure out which energy group to use -#if 0 - gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]); -#endif - gid_tp = 0; + gid_tp = GET_CGINFO_GID(fr->cginfo[a_tp0]); + for (int a = a_tp0 + 1; a < a_tp1; a++) + { + if (GET_CGINFO_GID(fr->cginfo[a]) != gid_tp) + { + fprintf(fplog, + "NOTE: Atoms in the molecule to insert belong to different energy groups.\n" + " Only contributions to the group of the first atom will be reported.\n"); + break; + } + } nener = 1 + ngid; if (bDispCorr) { @@ -503,13 +573,25 @@ LegacySimulator::do_tpi() copy_rvec(rerun_fr.x[i], x[i]); } copy_mat(rerun_fr.box, state_global->box); + const matrix &box = state_global->box; - V = det(state_global->box); + V = det(box); logV = log(V); bStateChanged = TRUE; bNS = TRUE; + put_atoms_in_box(fr->ePBC, box, x); + + /* Put all atoms except for the inserted ones on the grid */ + rvec vzero = { 0, 0, 0 }; + rvec boxDiagonal = { box[XX][XX], box[YY][YY], box[ZZ][ZZ] }; + nbnxn_put_on_grid(fr->nbv.get(), box, + 0, vzero, boxDiagonal, + nullptr, 0, a_tp0, -1, + fr->cginfo, x, + 0, nullptr); + step = cr->nodeid*stepblocksize; while (step < nsteps) { @@ -536,24 +618,6 @@ LegacySimulator::do_tpi() x_init[d] = dist(rng)*state_global->box[d][d]; } } - - if (inputrec->nstlist == 1) - { - copy_rvec(x_init, x_tp); - } - else - { - /* Generate coordinates within |dx|=drmax of x_init */ - do - { - for (d = 0; d < DIM; d++) - { - dx[d] = (2*dist(rng) - 1)*drmax; - } - } - while (norm2(dx) > drmax*drmax); - rvec_add(x_init, dx, x_tp); - } } else { @@ -587,6 +651,38 @@ LegacySimulator::do_tpi() } } } + } + + if (bNS) + { + for (int a = a_tp0; a < a_tp1; a++) + { + x[a] = x_init; + } + + /* Put the inserted molecule on it's own search grid */ + nbnxn_put_on_grid(fr->nbv.get(), box, + 1, x_init, x_init, + nullptr, a_tp0, a_tp1, -1, + fr->cginfo, x, + 0, nullptr); + + /* TODO: Avoid updating all atoms at every bNS step */ + fr->nbv->setAtomProperties(*mdatoms, fr->cginfo); + + fr->nbv->constructPairlist(Nbnxm::InteractionLocality::Local, + &top.excls, step, nrnb); + + bNS = FALSE; + } + + /* Add random displacement uniformly distributed in a sphere + * of radius rtpi. We don't need to do this is we generate + * a new center location every step. + */ + rvec x_tp; + if (bCavity || inputrec->nstlist > 1) + { /* Generate coordinates within |dx|=drmax of x_init */ do { @@ -598,6 +694,10 @@ LegacySimulator::do_tpi() while (norm2(dx) > drmax*drmax); rvec_add(x_init, dx, x_tp); } + else + { + copy_rvec(x_init, x_tp); + } if (a_tp1 - a_tp0 == 1) { @@ -624,18 +724,16 @@ LegacySimulator::do_tpi() } } + /* Note: NonLocal refers to the inserted molecule */ + fr->nbv->setCoordinates(Nbnxm::AtomLocality::NonLocal, false, + x, BufferOpsUseGpu::False, nullptr); + /* Clear some matrix variables */ clear_mat(force_vir); clear_mat(shake_vir); clear_mat(vir); clear_mat(pres); - /* Set the center of geometry mass of the test molecule */ - // TODO: Compute and set the COG -#if 0 - copy_rvec(x_init, fr->cg_cm[top.cgs.nr-1]); -#endif - /* Calc energy (no forces) on new positions. * Since we only need the intermolecular energy * and the RF exclusion terms of the inserted molecule occur @@ -659,7 +757,6 @@ LegacySimulator::do_tpi() state_global->lambda, nullptr, fr, mdScheduleWork, nullptr, mu_tot, t, nullptr, GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY | - (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS : 0) | (bStateChanged ? GMX_FORCE_STATECHANGED : 0), DDBalanceRegionHandler(nullptr)); std::feclearexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); @@ -667,7 +764,6 @@ LegacySimulator::do_tpi() cr->nnodes = nnodes; bStateChanged = FALSE; - bNS = FALSE; if (fr->dispersionCorrection) { @@ -684,6 +780,10 @@ LegacySimulator::do_tpi() { enerd->term[F_DISPCORR] = 0; } + if (EEL_RF(fr->ic->eeltype)) + { + enerd->term[F_EPOT] += rfExclusionEnergy; + } epot = enerd->term[F_EPOT]; bEnergyOutOfBounds = FALSE; @@ -750,7 +850,7 @@ LegacySimulator::do_tpi() } if (bRFExcl) { - sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU; + sum_UgembU[e++] += rfExclusionEnergy*embU; } if (EEL_FULL(fr->ic->eeltype)) { diff --git a/src/gromacs/nbnxm/grid.cpp b/src/gromacs/nbnxm/grid.cpp index a3c4b9d59c..54b10eaf28 100644 --- a/src/gromacs/nbnxm/grid.cpp +++ b/src/gromacs/nbnxm/grid.cpp @@ -104,13 +104,32 @@ static real gridAtomDensity(int numAtoms, void Grid::setDimensions(const int ddZone, const int numAtoms, - const rvec lowerCorner, - const rvec upperCorner, + gmx::RVec lowerCorner, + gmx::RVec upperCorner, real atomDensity, const real maxAtomGroupRadius, const bool haveFep, gmx::PinningPolicy pinningPolicy) { + /* We allow passing lowerCorner=upperCorner, in which case we need to + * create a finite sized bounding box to avoid division by zero. + * We use a minimum size such that the volume fits in float with some + * margin for computing and using the atom number density. + */ + constexpr real c_minimumGridSize = 1e-10; + for (int d = 0; d < DIM; d++) + { + GMX_ASSERT(upperCorner[d] >= lowerCorner[d], "Upper corner should be larger than the lower corner"); + if (upperCorner[d] - lowerCorner[d] < c_minimumGridSize) + { + /* Ensure we apply a correction to the bounding box */ + real correction = std::max(std::abs(lowerCorner[d])*GMX_REAL_EPS, + 0.5_real*c_minimumGridSize); + lowerCorner[d] -= correction; + upperCorner[d] += correction; + } + } + /* For the home zone we compute the density when not set (=-1) or when =0 */ if (ddZone == 0 && atomDensity <= 0) { @@ -151,7 +170,7 @@ void Grid::setDimensions(const int ddZone, tlen_y = tlen*c_gpuNumClusterPerCellY; } /* We round ncx and ncy down, because we get less cell pairs - * in the nbsist when the fixed cell dimensions (x,y) are + * in the pairlist when the fixed cell dimensions (x,y) are * larger than the variable one (z) than the other way around. */ dimensions_.numCells[XX] = std::max(1, static_cast(size[XX]/tlen_x)); @@ -1409,12 +1428,9 @@ void Grid::setCellIndices(int ddZone, /* Set the cell indices for the moved particles */ int n0 = numCellsTotal_*numAtomsPerCell; int n1 = numCellsTotal_*numAtomsPerCell + cxy_na_[numColumns()]; - if (ddZone == 0) + for (int i = n0; i < n1; i++) { - for (int i = n0; i < n1; i++) - { - cells[atomIndices[i]] = i; - } + cells[atomIndices[i]] = i; } } diff --git a/src/gromacs/nbnxm/grid.h b/src/gromacs/nbnxm/grid.h index 3da68f5772..4785f4a549 100644 --- a/src/gromacs/nbnxm/grid.h +++ b/src/gromacs/nbnxm/grid.h @@ -403,8 +403,8 @@ class Grid //! Sets the grid dimensions void setDimensions(int ddZone, int numAtoms, - const rvec lowerCorner, - const rvec upperCorner, + gmx::RVec lowerCorner, + gmx::RVec upperCorner, real atomDensity, real maxAtomGroupRadius, bool haveFep, diff --git a/src/gromacs/nbnxm/gridset.cpp b/src/gromacs/nbnxm/gridset.cpp index 34369acf94..28a88bbeb9 100644 --- a/src/gromacs/nbnxm/gridset.cpp +++ b/src/gromacs/nbnxm/gridset.cpp @@ -54,25 +54,35 @@ namespace Nbnxm { -//! Returns the number of DD zones -static int numDDZones(const std::array &haveMultipleDomainsPerDim) +//! Returns the number of search grids +static int numGrids(const GridSet::DomainSetup &domainSetup) { - int numDDZones = 1; - for (auto haveDD : haveMultipleDomainsPerDim) + int numGrids; + if (domainSetup.doTestParticleInsertion) { - if (haveDD) + numGrids = 2; + } + else + { + numGrids = 1; + for (auto haveDD : domainSetup.haveMultipleDomainsPerDim) { - numDDZones *= 2; + if (haveDD) + { + numGrids *= 2; + } } } - return numDDZones; + return numGrids; } GridSet::DomainSetup::DomainSetup(const int ePBC, + const bool doTestParticleInsertion, const ivec *numDDCells, const gmx_domdec_zones_t *ddZones) : ePBC(ePBC), + doTestParticleInsertion(doTestParticleInsertion), haveMultipleDomains(numDDCells != nullptr), zones(ddZones) { @@ -83,14 +93,15 @@ GridSet::DomainSetup::DomainSetup(const int ePBC, } GridSet::GridSet(const int ePBC, + const bool doTestParticleInsertion, const ivec *numDDCells, const gmx_domdec_zones_t *ddZones, const PairlistType pairlistType, const bool haveFep, const int numThreads, gmx::PinningPolicy pinningPolicy) : - domainSetup_(ePBC, numDDCells, ddZones), - grids_(numDDZones(domainSetup_.haveMultipleDomainsPerDim), Grid(pairlistType, haveFep_)), + domainSetup_(ePBC, doTestParticleInsertion, numDDCells, ddZones), + grids_(numGrids(domainSetup_), Grid(pairlistType, haveFep_)), haveFep_(haveFep), numRealAtomsLocal_(0), numRealAtomsTotal_(0), @@ -123,7 +134,7 @@ void GridSet::setLocalAtomOrder() // TODO: Move to gridset.cpp void GridSet::putOnGrid(const matrix box, - const int ddZone, + const int gridIndex, const rvec lowerCorner, const rvec upperCorner, const gmx::UpdateGroupsCog *updateGroupsCog, @@ -136,29 +147,29 @@ void GridSet::putOnGrid(const matrix box, const int *move, nbnxn_atomdata_t *nbat) { - Nbnxm::Grid &grid = grids_[ddZone]; + Nbnxm::Grid &grid = grids_[gridIndex]; int cellOffset; - if (ddZone == 0) + if (gridIndex == 0) { cellOffset = 0; } else { - const Nbnxm::Grid &previousGrid = grids_[ddZone - 1]; + const Nbnxm::Grid &previousGrid = grids_[gridIndex - 1]; cellOffset = previousGrid.atomIndexEnd()/previousGrid.geometry().numAtomsPerCell; } const int n = atomEnd - atomStart; real maxAtomGroupRadius; - if (ddZone == 0) + if (gridIndex == 0) { copy_mat(box, box_); numRealAtomsLocal_ = atomEnd - numAtomsMoved; /* We assume that nbnxn_put_on_grid is called first - * for the local atoms (ddZone=0). + * for the local atoms (gridIndex=0). */ numRealAtomsTotal_ = atomEnd - numAtomsMoved; @@ -182,8 +193,9 @@ void GridSet::putOnGrid(const matrix box, /* We always use the home zone (grid[0]) for setting the cell size, * since determining densities for non-local zones is difficult. */ - // grid data used in GPU transfers inherits the gridset pinnin policy - auto pinPolicy = gridSetData_.cells.get_allocator().pinningPolicy(); + const int ddZone = (domainSetup_.doTestParticleInsertion ? 0 : gridIndex); + // grid data used in GPU transfers inherits the gridset pinning policy + auto pinPolicy = gridSetData_.cells.get_allocator().pinningPolicy(); grid.setDimensions(ddZone, n - numAtomsMoved, lowerCorner, upperCorner, atomDensity, @@ -220,11 +232,11 @@ void GridSet::putOnGrid(const matrix box, grid.setCellIndices(ddZone, cellOffset, &gridSetData_, gridWork_, atomStart, atomEnd, atomInfo.data(), x, numAtomsMoved, nbat); - if (ddZone == 0) + if (gridIndex == 0) { nbat->natoms_local = nbat->numAtoms(); } - if (ddZone == gmx::ssize(grids_) - 1) + if (gridIndex == gmx::ssize(grids_) - 1) { /* We are done setting up all grids, we can resize the force buffers */ nbat->resizeForceBuffers(); diff --git a/src/gromacs/nbnxm/gridset.h b/src/gromacs/nbnxm/gridset.h index 38d9650e4f..142a1f28b4 100644 --- a/src/gromacs/nbnxm/gridset.h +++ b/src/gromacs/nbnxm/gridset.h @@ -73,6 +73,12 @@ namespace Nbnxm /*! \internal * \brief Holds a set of search grids for the local + non-local DD zones + * + * The are three different possible setups: + * - a single grid, this is the standard case without domain decomposition + * - one grid for each domain decomposition zone + * - with test particle insertion there are two grids, one for the system + * to insert in and one for the molecule that is inserted */ class GridSet { @@ -84,11 +90,14 @@ class GridSet { //! Constructor, without DD \p numDDCells and \p ddZones should be nullptr DomainSetup(int ePBC, + bool doTestParticleInsertion, const ivec *numDDCells, const gmx_domdec_zones_t *ddZones); //! The type of PBC int ePBC; + //! Tells whether we are doing test-particle insertion + bool doTestParticleInsertion; //! Are there multiple domains? bool haveMultipleDomains; //! Are there multiple domains along each dimension? @@ -99,6 +108,7 @@ class GridSet //! Constructs a grid set for 1 or multiple DD zones, when numDDCells!=nullptr GridSet(int ePBC, + bool doTestParticleInsertion, const ivec *numDDCells, const gmx_domdec_zones_t *ddZones, PairlistType pairlistType, @@ -106,9 +116,9 @@ class GridSet int numThreads, gmx::PinningPolicy pinningPolicy); - //! Puts the atoms in \p ddZone on the grid and copies the coordinates to \p nbat + //! Puts the atoms on the grid with index \p gridIndex and copies the coordinates to \p nbat void putOnGrid(const matrix box, - int ddZone, + int gridIndex, const rvec lowerCorner, const rvec upperCorner, const gmx::UpdateGroupsCog *updateGroupsCog, diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp index 6715fef9c2..e87e6363c5 100644 --- a/src/gromacs/nbnxm/nbnxm_setup.cpp +++ b/src/gromacs/nbnxm/nbnxm_setup.cpp @@ -472,6 +472,7 @@ init_nb_verlet(const gmx::MDLogger &mdlog, auto pairSearch = std::make_unique(ir->ePBC, + EI_TPI(ir->eI), DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr, DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr, pairlistParams.pairlistType, diff --git a/src/gromacs/nbnxm/pairlist.cpp b/src/gromacs/nbnxm/pairlist.cpp index 43ed81cd32..85bb369207 100644 --- a/src/gromacs/nbnxm/pairlist.cpp +++ b/src/gromacs/nbnxm/pairlist.cpp @@ -4041,6 +4041,11 @@ PairlistSet::constructPairlists(const Nbnxm::GridSet &gridSet, for (int zi = 0; zi < nzi; zi++) { + /* With TPI we do grid 1, the inserted molecule, versus grid 0, the rest */ + if (gridSet.domainSetup().doTestParticleInsertion) + { + zi = 1; + } const Grid &iGrid = gridSet.grids()[zi]; int zj0; diff --git a/src/gromacs/nbnxm/pairsearch.cpp b/src/gromacs/nbnxm/pairsearch.cpp index 021f6cb25a..2d425de5ef 100644 --- a/src/gromacs/nbnxm/pairsearch.cpp +++ b/src/gromacs/nbnxm/pairsearch.cpp @@ -109,13 +109,14 @@ PairsearchWork::~PairsearchWork() } PairSearch::PairSearch(const int ePBC, + const bool doTestParticleInsertion, const ivec *numDDCells, const gmx_domdec_zones_t *ddZones, const PairlistType pairlistType, const bool haveFep, const int maxNumThreads, gmx::PinningPolicy pinningPolicy) : - gridSet_(ePBC, numDDCells, ddZones, pairlistType, haveFep, maxNumThreads, pinningPolicy), + gridSet_(ePBC, doTestParticleInsertion, numDDCells, ddZones, pairlistType, haveFep, maxNumThreads, pinningPolicy), work_(maxNumThreads) { cycleCounting_.recordCycles_ = (getenv("GMX_NBNXN_CYCLE") != nullptr); diff --git a/src/gromacs/nbnxm/pairsearch.h b/src/gromacs/nbnxm/pairsearch.h index 10ab4d15f9..7bf8f421a3 100644 --- a/src/gromacs/nbnxm/pairsearch.h +++ b/src/gromacs/nbnxm/pairsearch.h @@ -207,6 +207,7 @@ class PairSearch * \param[in] maxNumThreads The maximum number of threads used in the search */ PairSearch(int ePBC, + bool doTestParticleInsertion, const ivec *numDDCells, const gmx_domdec_zones_t *zones, PairlistType pairlistType, diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp index 452a771616..a7e7b0a995 100644 --- a/src/gromacs/taskassignment/resourcedivision.cpp +++ b/src/gromacs/taskassignment/resourcedivision.cpp @@ -655,7 +655,8 @@ static void print_hw_opt(FILE *fp, const gmx_hw_opt_t *hw_opt) void checkAndUpdateHardwareOptions(const gmx::MDLogger &mdlog, gmx_hw_opt_t *hw_opt, const bool isSimulationMasterRank, - const int nPmeRanks) + const int nPmeRanks, + const t_inputrec *inputrec) { /* Currently hw_opt only contains default settings or settings supplied * by the user on the command line. @@ -688,6 +689,18 @@ void checkAndUpdateHardwareOptions(const gmx::MDLogger &mdlog, "compiled without thread-MPI"); } } + + /* With thread-MPI we need to handle TPI and #OpenMP-threads=auto early, + * so we can parallelize using MPI only. The general check is done later. + */ + if (GMX_THREAD_MPI && isSimulationMasterRank) + { + GMX_RELEASE_ASSERT(inputrec, "Expect a valid inputrec"); + if (EI_TPI(inputrec->eI) && hw_opt->nthreads_omp == 0) + { + hw_opt->nthreads_omp = 1; + } + } /* With thread-MPI the master thread sets hw_opt->totNumThreadsIsAuto. * The other threads receive a partially processed hw_opt from the master * thread and should not set hw_opt->totNumThreadsIsAuto again. @@ -801,8 +814,18 @@ void checkAndUpdateRequestedNumOpenmpThreads(gmx_hw_opt_t *hw_opt, const gmx_multisim_t *ms, int numRanksOnThisNode, PmeRunMode pmeRunMode, - const gmx_mtop_t &mtop) + const gmx_mtop_t &mtop, + const t_inputrec &inputrec) { + if (EI_TPI(inputrec.eI)) + { + if (hw_opt->nthreads_omp > 1) + { + gmx_fatal(FARGS, "You requested OpenMP parallelization, which is not supported with TPI."); + } + hw_opt->nthreads_omp = 1; + } + if (GMX_THREAD_MPI) { diff --git a/src/gromacs/taskassignment/resourcedivision.h b/src/gromacs/taskassignment/resourcedivision.h index 6d8f1c2354..2bea6245a0 100644 --- a/src/gromacs/taskassignment/resourcedivision.h +++ b/src/gromacs/taskassignment/resourcedivision.h @@ -104,11 +104,13 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo, * \param[in, out] hw_opt Hardware-related and threading options * \param[in] isSimulationMasterRank * \param[in] nPmeRanks Number of PME ranks + * \param[in] inputrec The input record, should point to a valid object when \p isSimulationMasterRank = true * */ void checkAndUpdateHardwareOptions(const gmx::MDLogger &mdlog, gmx_hw_opt_t *hw_opt, bool isSimulationMasterRank, - int nPmeRanks); + int nPmeRanks, + const t_inputrec *inputrec); /*! \brief Check, and if necessary update, the number of OpenMP threads requested * @@ -120,7 +122,8 @@ void checkAndUpdateRequestedNumOpenmpThreads(gmx_hw_opt_t *hw_opt, const gmx_multisim_t *ms, int numRanksOnThisNode, PmeRunMode pmeRunMode, - const gmx_mtop_t &mtop); + const gmx_mtop_t &mtop, + const t_inputrec &inputrec); namespace gmx { diff --git a/src/programs/mdrun/tests/CMakeLists.txt b/src/programs/mdrun/tests/CMakeLists.txt index d1b4c0f4ad..da17cd990f 100644 --- a/src/programs/mdrun/tests/CMakeLists.txt +++ b/src/programs/mdrun/tests/CMakeLists.txt @@ -83,13 +83,26 @@ gmx_add_gtest_executable( normalmodes.cpp rerun.cpp simple_mdrun.cpp - tpitest.cpp # pseudo-library for code for mdrun $ ) target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure) gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST) +# TPI does not support OpenMP, so we need a separate test binary +set(testname "MdrunTpiTests") +set(exename "mdrun-tpi-test") + +gmx_add_gtest_executable( + ${exename} + # files with code for tests + tpitest.cpp + # pseudo-library for code for mdrun + $ + ) +target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure) +gmx_register_gtest_test(${testname} ${exename} INTEGRATION_TEST) + # Tests that only make sense to run with multiple ranks and/or real # MPI are implemented here. set(testname "MdrunMpiTests") diff --git a/src/programs/mdrun/tests/tpitest.cpp b/src/programs/mdrun/tests/tpitest.cpp index 1258a57f9d..b080e4fbff 100644 --- a/src/programs/mdrun/tests/tpitest.cpp +++ b/src/programs/mdrun/tests/tpitest.cpp @@ -127,6 +127,7 @@ TEST_P(TpiTest, ReproducesOutput) epsilon-r = 1 epsilon-rf = 0 vdw-type = cut-off + vdw-modifier = none rvdw = 0.9 Tcoupl = no tc-grps = System @@ -139,8 +140,7 @@ TEST_P(TpiTest, ReproducesOutput) runTest(); } -// Should be re-enabled when TPI supports the Verlet scheme -INSTANTIATE_TEST_CASE_P(DISABLED_Simple, TpiTest, ::testing::Values(1993, 2994)); +INSTANTIATE_TEST_CASE_P(Simple, TpiTest, ::testing::Values(1993, 2994)); } // namespace } // namespace test -- 2.22.0