Enable TPI with the Verlet cut-off scheme
authorBerk Hess <hess@kth.se>
Tue, 10 Sep 2019 09:01:19 +0000 (11:01 +0200)
committerMagnus Lundborg <magnus.lundborg@scilifelab.se>
Thu, 12 Sep 2019 09:15:57 +0000 (11:15 +0200)
This requires a special treatment for the nbnxm gridding and search,
with one grid for the system to insert in and one grid for
the molecule to insert.
Also reenabled the TPI tests.

Change-Id: Ia3888daf4b06a94b0a7a03ec17b7bbe27084ac3d

18 files changed:
docs/user-guide/mdp-options.rst
src/gromacs/gmxpreprocess/readir.cpp
src/gromacs/mdlib/forcerec.cpp
src/gromacs/mdlib/sim_util.cpp
src/gromacs/mdrun/runner.cpp
src/gromacs/mdrun/tpi.cpp
src/gromacs/nbnxm/grid.cpp
src/gromacs/nbnxm/grid.h
src/gromacs/nbnxm/gridset.cpp
src/gromacs/nbnxm/gridset.h
src/gromacs/nbnxm/nbnxm_setup.cpp
src/gromacs/nbnxm/pairlist.cpp
src/gromacs/nbnxm/pairsearch.cpp
src/gromacs/nbnxm/pairsearch.h
src/gromacs/taskassignment/resourcedivision.cpp
src/gromacs/taskassignment/resourcedivision.h
src/programs/mdrun/tests/CMakeLists.txt
src/programs/mdrun/tests/tpitest.cpp

index 711c8771280c9a292501851e9eecf5590491d53b..1f76fd1972c857ed230a23092880d4bfbb9c8bc5 100644 (file)
@@ -563,10 +563,12 @@ Neighbor searching
 .. mdp:: rlist
 
    (1) [nm]
-   Cut-off distance for the short-range neighbor list. With the
-   :mdp-value:`cutoff-scheme=Verlet` :mdp:`cutoff-scheme`, this is by default set by the
-   :mdp:`verlet-buffer-tolerance` option and the value of
-   :mdp:`rlist` is ignored.
+   Cut-off distance for the short-range neighbor list. With dynamics,
+   this is by default set by the :mdp:`verlet-buffer-tolerance` option
+   and the value of :mdp:`rlist` is ignored. Without dynamics, this
+   is by default set to the maximum cut-off plus 5% buffer, except
+   for test particle insertion, where the buffer is managed exactly
+   and automatically.
 
 
 Electrostatics
index 8f92c0cacd775fc82a98f4f30ef4b0c906cc24cd..c0623751e85bfc4d3ebe0cd3e5134ce04eaaa329 100644 (file)
@@ -368,7 +368,13 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
 
         rc_max = std::max(ir->rvdw, ir->rcoulomb);
 
-        if (ir->verletbuf_tol <= 0)
+        if (EI_TPI(ir->eI))
+        {
+            /* With TPI we set the pairlist cut-off later using the radius of the insterted molecule */
+            ir->verletbuf_tol = 0;
+            ir->rlist         = rc_max;
+        }
+        else if (ir->verletbuf_tol <= 0)
         {
             if (ir->verletbuf_tol == 0)
             {
@@ -546,8 +552,6 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
         CHECK(ir->nstlist <= 0);
         sprintf(err_buf, "TPI does not work with full electrostatics other than PME");
         CHECK(EEL_FULL(ir->coulombtype) && !EEL_PME(ir->coulombtype));
-        sprintf(err_buf, "TPI does not work (yet) with the Verlet cut-off scheme");
-        CHECK(ir->cutoff_scheme == ecutsVERLET);
     }
 
     /* SHAKE / LINCS */
index 8230129f2bcb524c3be25bdf67a64ad4cbedc407..0b478cf1106799866f2359268347a074fee7c634 100644 (file)
@@ -1475,7 +1475,6 @@ void init_forcerec(FILE                             *fp,
     real           rtab;
     char          *env;
     double         dbl;
-    const t_block *cgs;
     gmx_bool       bGenericKernelOnly;
     gmx_bool       bFEP_NonBonded;
 
@@ -1491,16 +1490,8 @@ void init_forcerec(FILE                             *fp,
     if (EI_TPI(ir->eI))
     {
         /* Set to the size of the molecule to be inserted (the last one) */
-        /* Because of old style topologies, we have to use the last cg
-         * instead of the last molecule type.
-         */
-        cgs       = &mtop->moltype[mtop->molblock.back().type].cgs;
-        fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
         gmx::RangePartitioning molecules = gmx_mtop_molecules(*mtop);
-        if (fr->n_tpi != molecules.block(molecules.numBlocks() - 1).size())
-        {
-            gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
-        }
+        fr->n_tpi = molecules.block(molecules.numBlocks() - 1).size();
     }
     else
     {
index 768ca4577733cfc69b43a9bc73dca0946a8b411a..945165cdc90654067af3f8c756fa4f0b03dd09d9 100644 (file)
@@ -1118,7 +1118,7 @@ void do_force(FILE                                     *fplog,
             nbv->atomdata_init_add_nbat_f_to_f_gpu();
         }
     }
-    else
+    else if (!EI_TPI(inputrec->eI))
     {
         if (useGpuXBufOps == BufferOpsUseGpu::True)
         {
@@ -1376,13 +1376,16 @@ void do_force(FILE                                     *fplog,
                          step, nrnb, wcycle);
         }
 
-        /* Add all the non-bonded force to the normal force array.
-         * This can be split into a local and a non-local part when overlapping
-         * communication with calculation with domain decomposition.
-         */
-        wallcycle_stop(wcycle, ewcFORCE);
-        nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.forceWithShiftForces().force());
-        wallcycle_start_nocount(wcycle, ewcFORCE);
+        if (forceFlags.computeForces)
+        {
+            /* Add all the non-bonded force to the normal force array.
+             * This can be split into a local and a non-local part when overlapping
+             * communication with calculation with domain decomposition.
+             */
+            wallcycle_stop(wcycle, ewcFORCE);
+            nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.forceWithShiftForces().force());
+            wallcycle_start_nocount(wcycle, ewcFORCE);
+        }
 
         /* If there are multiple fshift output buffers we need to reduce them */
         if (forceFlags.computeVirial)
index 64248d311dd172402f3d6a564d7cda107ce81db8..5caa3caee57552048aceb65916164c6917b013f2 100644 (file)
@@ -394,24 +394,36 @@ namespace gmx
  * falling back to CPU code. With thread-MPI, only the first
  * call to this function should have \c issueWarning true. */
 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
-                                               const t_inputrec *ir,
+                                               const t_inputrec &ir,
                                                bool              issueWarning)
 {
-    if (ir->opts.ngener - ir->nwall > 1)
+    bool        gpuIsUseful = true;
+    std::string warning;
+
+    if (ir.opts.ngener - ir.nwall > 1)
     {
         /* The GPU code does not support more than one energy group.
          * If the user requested GPUs explicitly, a fatal error is given later.
          */
-        if (issueWarning)
-        {
-            GMX_LOG(mdlog.warning).asParagraph()
-                .appendText("Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
-                            "For better performance, run on the GPU without energy groups and then do "
-                            "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.");
-        }
-        return false;
+        gpuIsUseful = false;
+        warning     =
+            "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
+            "For better performance, run on the GPU without energy groups and then do "
+            "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
     }
-    return true;
+
+    if (EI_TPI(ir.eI))
+    {
+        gpuIsUseful = false;
+        warning     = "TPI is not implemented for GPUs.";
+    }
+
+    if (!gpuIsUseful && issueWarning)
+    {
+        GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
+    }
+
+    return gpuIsUseful;
 }
 
 //! Initializes the logger for mdrun.
@@ -605,8 +617,6 @@ int Mdrunner::mdrunner()
 
     /* CAUTION: threads may be started later on in this function, so
        cr doesn't reflect the final parallel state right now */
-    t_inputrec                      inputrecInstance;
-    t_inputrec                     *inputrec = &inputrecInstance;
     gmx_mtop_t                      mtop;
 
     bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data());
@@ -659,6 +669,9 @@ int Mdrunner::mdrunner()
     // Print citation requests after all software/hardware printing
     pleaseCiteGromacs(fplog);
 
+    // TODO Replace this by unique_ptr once t_inputrec is C++
+    t_inputrec               inputrecInstance;
+    t_inputrec              *inputrec = nullptr;
     std::unique_ptr<t_state> globalState;
 
     if (SIMMASTER(cr))
@@ -667,11 +680,13 @@ int Mdrunner::mdrunner()
         globalState = std::make_unique<t_state>();
 
         /* Read (nearly) all data required for the simulation */
-        read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), inputrec, globalState.get(), &mtop);
+        read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()),
+                       &inputrecInstance, globalState.get(), &mtop);
+        inputrec = &inputrecInstance;
     }
 
     /* Check and update the hardware options for internal consistency */
-    checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks);
+    checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks, inputrec);
 
     if (GMX_THREAD_MPI && SIMMASTER(cr))
     {
@@ -679,6 +694,8 @@ int Mdrunner::mdrunner()
         bool useGpuForPme       = false;
         try
         {
+            GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
+
             // If the user specified the number of ranks, then we must
             // respect that, but in default mode, we need to allow for
             // the number of GPUs to choose the number of ranks.
@@ -686,7 +703,7 @@ int Mdrunner::mdrunner()
             useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
                     (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
                     canUseGpuForNonbonded,
-                    gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI),
+                    gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
                     hw_opt.nthreads_tmpi);
             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
@@ -723,8 +740,13 @@ int Mdrunner::mdrunner()
     if (PAR(cr))
     {
         /* now broadcast everything to the non-master nodes/threads: */
+        if (!SIMMASTER(cr))
+        {
+            inputrec = &inputrecInstance;
+        }
         init_parallel(cr, inputrec, &mtop);
     }
+    GMX_RELEASE_ASSERT(inputrec != nullptr, "All range should have a valid inputrec now");
 
     // Now each rank knows the inputrec that SIMMASTER read and used,
     // and (if applicable) cr->nnodes has been assigned the number of
@@ -754,7 +776,7 @@ int Mdrunner::mdrunner()
         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
                                                                 emulateGpuNonbonded,
                                                                 canUseGpuForNonbonded,
-                                                                gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
+                                                                gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI),
                                                                 gpusWereDetected);
         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
                                                     *hwinfo, *inputrec, mtop,
@@ -1044,7 +1066,7 @@ int Mdrunner::mdrunner()
                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
     /* Check and update the number of OpenMP threads requested */
     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
-                                            pmeRunMode, mtop);
+                                            pmeRunMode, mtop, *inputrec);
 
     gmx_omp_nthreads_init(mdlog, cr,
                           hwinfo->nthreads_hw_avail,
index 382a32be2ff2adaf7a4141c2daa2f6dc2fc023dc..3afadf161f6a34883af86f22fbca8070bbd0277f 100644 (file)
@@ -70,6 +70,7 @@
 #include "gromacs/mdlib/energyoutput.h"
 #include "gromacs/mdlib/force.h"
 #include "gromacs/mdlib/force_flags.h"
+#include "gromacs/mdlib/gmx_omp_nthreads.h"
 #include "gromacs/mdlib/mdatoms.h"
 #include "gromacs/mdlib/tgroup.h"
 #include "gromacs/mdlib/update.h"
@@ -82,6 +83,7 @@
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/mdtypes/mdrunoptions.h"
 #include "gromacs/mdtypes/state.h"
+#include "gromacs/nbnxm/nbnxm.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/random/threefry.h"
 #include "gromacs/random/uniformrealdistribution.h"
@@ -129,6 +131,31 @@ static void realloc_bins(double **bin, int *nbin, int nbin_new)
     }
 }
 
+//! Computes and returns the RF exclusion energy for the last molecule starting at \p beginAtom
+static real
+reactionFieldExclusionCorrection(gmx::ArrayRef<const gmx::RVec> x,
+                                 const t_mdatoms               &mdatoms,
+                                 const interaction_const_t     &ic,
+                                 const int                      beginAtom)
+{
+    real energy = 0;
+
+    for (int i = beginAtom; i < mdatoms.homenr; i++)
+    {
+        const real qi = mdatoms.chargeA[i];
+        energy -= 0.5*qi*qi*ic.c_rf;
+
+        for (int j = i + 1; j < mdatoms.homenr; j++)
+        {
+            const real qj  = mdatoms.chargeA[j];
+            const real rsq = distance2(x[i], x[j]);
+            energy += qi*qj*(ic.k_rf*rsq - ic.c_rf);
+        }
+    }
+
+    return ic.epsfac*energy;
+}
+
 namespace gmx
 {
 
@@ -136,6 +163,8 @@ namespace gmx
 void
 LegacySimulator::do_tpi()
 {
+    GMX_RELEASE_ASSERT(gmx_omp_nthreads_get(emntDefault) == 1, "TPI does not support OpenMP");
+
     gmx_localtop_t          top;
     PaddedVector<gmx::RVec> f {};
     real                    lambda, t, temp, beta, drmax, epot;
@@ -146,7 +175,7 @@ LegacySimulator::do_tpi()
     tensor                  force_vir, shake_vir, vir, pres;
     int                     a_tp0, a_tp1, ngid, gid_tp, nener, e;
     rvec                   *x_mol;
-    rvec                    mu_tot, x_init, dx, x_tp;
+    rvec                    mu_tot, x_init, dx;
     int                     nnodes, frame;
     int64_t                 frame_step_prev, frame_step;
     int64_t                 nsteps, stepblocksize = 0, step;
@@ -177,11 +206,6 @@ LegacySimulator::do_tpi()
     real bU_bin_limit      = 50;
     real bU_logV_bin_limit = bU_bin_limit + 10;
 
-    if (inputrec->cutoff_scheme == ecutsVERLET)
-    {
-        gmx_fatal(FARGS, "TPI does not work (yet) with the Verlet cut-off scheme");
-    }
-
     nnodes = cr->nnodes;
 
     gmx_mtop_generate_local_top(*top_global, &top, inputrec->efep != efepNO);
@@ -279,13 +303,31 @@ LegacySimulator::do_tpi()
         fprintf(debug, "TPI atoms %d-%d\n", a_tp0, a_tp1);
     }
 
-    GMX_RELEASE_ASSERT(inputrec->rcoulomb <= inputrec->rlist && inputrec->rvdw <= inputrec->rlist, "Twin-range interactions are not supported with TPI");
+    auto x = makeArrayRef(state_global->x);
+
+    if (EEL_PME(fr->ic->eeltype))
+    {
+        gmx_pme_reinit_atoms(fr->pmedata, a_tp0, nullptr);
+    }
+
+    /* With reacion-field we have distance dependent potentials
+     * between excluded atoms, we need to add these separately
+     * for the inserted molecule.
+     */
+    real rfExclusionEnergy = 0;
+    if (EEL_RF(fr->ic->eeltype))
+    {
+        rfExclusionEnergy = reactionFieldExclusionCorrection(x, *mdatoms, *fr->ic, a_tp0);
+        if (debug)
+        {
+            fprintf(debug, "RF exclusion correction for inserted molecule: %f kJ/mol\n", rfExclusionEnergy);
+        }
+    }
 
     snew(x_mol, a_tp1-a_tp0);
 
     bDispCorr = (inputrec->eDispCorr != edispcNO);
     bCharge   = FALSE;
-    auto x = makeArrayRef(state_global->x);
     for (i = a_tp0; i < a_tp1; i++)
     {
         /* Copy the coordinates of the molecule to be insterted */
@@ -296,12 +338,24 @@ LegacySimulator::do_tpi()
     }
     bRFExcl = (bCharge && EEL_RF(fr->ic->eeltype));
 
-    // TODO: Calculate the center of geometry of the molecule to insert
-#if 0
-    calc_cgcm(fplog, cg_tp, cg_tp+1, &(top.cgs), state_global->x.rvec_array(), fr->cg_cm);
+    // Calculate the center of geometry of the molecule to insert
+    rvec cog = { 0, 0, 0 };
+    for (int a = a_tp0; a < a_tp1; a++)
+    {
+        rvec_inc(cog, x[a]);
+    }
+    svmul(1.0_real/(a_tp1 - a_tp0), cog, cog);
+    real molRadius = 0;
+    for (int a = a_tp0; a < a_tp1; a++)
+    {
+        molRadius = std::max(molRadius, distance2(x[a], cog));
+    }
+    molRadius = std::sqrt(molRadius);
+
+    const real maxCutoff =  std::max(inputrec->rvdw, inputrec->rcoulomb);
     if (bCavity)
     {
-        if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog)
+        if (norm(cog) > 0.5*maxCutoff && fplog)
         {
             fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
             fprintf(stderr, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
@@ -312,10 +366,9 @@ LegacySimulator::do_tpi()
         /* Center the molecule to be inserted at zero */
         for (i = 0; i < a_tp1-a_tp0; i++)
         {
-            rvec_dec(x_mol[i], fr->cg_cm[cg_tp]);
+            rvec_dec(x_mol[i], cog);
         }
     }
-#endif
 
     if (fplog)
     {
@@ -330,6 +383,8 @@ LegacySimulator::do_tpi()
     {
         if (inputrec->nstlist > 1)
         {
+
+            /* With the same pair list we insert in a sphere of radius rtpi  in different orientations */
             if (drmax == 0 && a_tp1-a_tp0 == 1)
             {
                 gmx_fatal(FARGS, "Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense", inputrec->nstlist, drmax);
@@ -348,12 +403,27 @@ LegacySimulator::do_tpi()
         }
     }
 
+    /* With the same pair list we insert in a sphere of radius rtpi
+     * in different orientations. We generate the pairlist with all
+     * inserted atoms located in the center of the sphere, so we need
+     * a buffer of size of the sphere and molecule radius.
+     */
+    inputrec->rlist = maxCutoff + 2*inputrec->rtpi + 2*molRadius;
+    fr->rlist       = inputrec->rlist;
+    fr->nbv->changePairlistRadii(inputrec->rlist, inputrec->rlist);
+
     ngid   = groups->groups[SimulationAtomGroupType::EnergyOutput].size();
-    // TODO: Figure out which energy group to use
-#if 0
-    gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]);
-#endif
-    gid_tp = 0;
+    gid_tp = GET_CGINFO_GID(fr->cginfo[a_tp0]);
+    for (int a = a_tp0 + 1; a < a_tp1; a++)
+    {
+        if (GET_CGINFO_GID(fr->cginfo[a]) != gid_tp)
+        {
+            fprintf(fplog,
+                    "NOTE: Atoms in the molecule to insert belong to different energy groups.\n"
+                    "      Only contributions to the group of the first atom will be reported.\n");
+            break;
+        }
+    }
     nener  = 1 + ngid;
     if (bDispCorr)
     {
@@ -503,13 +573,25 @@ LegacySimulator::do_tpi()
             copy_rvec(rerun_fr.x[i], x[i]);
         }
         copy_mat(rerun_fr.box, state_global->box);
+        const matrix &box = state_global->box;
 
-        V    = det(state_global->box);
+        V    = det(box);
         logV = log(V);
 
         bStateChanged = TRUE;
         bNS           = TRUE;
 
+        put_atoms_in_box(fr->ePBC, box, x);
+
+        /* Put all atoms except for the inserted ones on the grid */
+        rvec vzero       = { 0, 0, 0 };
+        rvec boxDiagonal = { box[XX][XX], box[YY][YY], box[ZZ][ZZ] };
+        nbnxn_put_on_grid(fr->nbv.get(), box,
+                          0, vzero, boxDiagonal,
+                          nullptr, 0, a_tp0, -1,
+                          fr->cginfo, x,
+                          0, nullptr);
+
         step = cr->nodeid*stepblocksize;
         while (step < nsteps)
         {
@@ -536,24 +618,6 @@ LegacySimulator::do_tpi()
                         x_init[d] = dist(rng)*state_global->box[d][d];
                     }
                 }
-
-                if (inputrec->nstlist == 1)
-                {
-                    copy_rvec(x_init, x_tp);
-                }
-                else
-                {
-                    /* Generate coordinates within |dx|=drmax of x_init */
-                    do
-                    {
-                        for (d = 0; d < DIM; d++)
-                        {
-                            dx[d] = (2*dist(rng) - 1)*drmax;
-                        }
-                    }
-                    while (norm2(dx) > drmax*drmax);
-                    rvec_add(x_init, dx, x_tp);
-                }
             }
             else
             {
@@ -587,6 +651,38 @@ LegacySimulator::do_tpi()
                         }
                     }
                 }
+            }
+
+            if (bNS)
+            {
+                for (int a = a_tp0; a < a_tp1; a++)
+                {
+                    x[a] = x_init;
+                }
+
+                /* Put the inserted molecule on it's own search grid */
+                nbnxn_put_on_grid(fr->nbv.get(), box,
+                                  1, x_init, x_init,
+                                  nullptr, a_tp0, a_tp1, -1,
+                                  fr->cginfo, x,
+                                  0, nullptr);
+
+                /* TODO: Avoid updating all atoms at every bNS step */
+                fr->nbv->setAtomProperties(*mdatoms, fr->cginfo);
+
+                fr->nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
+                                           &top.excls, step, nrnb);
+
+                bNS = FALSE;
+            }
+
+            /* Add random displacement uniformly distributed in a sphere
+             * of radius rtpi. We don't need to do this is we generate
+             * a new center location every step.
+             */
+            rvec x_tp;
+            if (bCavity || inputrec->nstlist > 1)
+            {
                 /* Generate coordinates within |dx|=drmax of x_init */
                 do
                 {
@@ -598,6 +694,10 @@ LegacySimulator::do_tpi()
                 while (norm2(dx) > drmax*drmax);
                 rvec_add(x_init, dx, x_tp);
             }
+            else
+            {
+                copy_rvec(x_init, x_tp);
+            }
 
             if (a_tp1 - a_tp0 == 1)
             {
@@ -624,18 +724,16 @@ LegacySimulator::do_tpi()
                 }
             }
 
+            /* Note: NonLocal refers to the inserted molecule */
+            fr->nbv->setCoordinates(Nbnxm::AtomLocality::NonLocal, false,
+                                    x, BufferOpsUseGpu::False, nullptr);
+
             /* Clear some matrix variables  */
             clear_mat(force_vir);
             clear_mat(shake_vir);
             clear_mat(vir);
             clear_mat(pres);
 
-            /* Set the center of geometry mass of the test molecule */
-            // TODO: Compute and set the COG
-#if 0
-            copy_rvec(x_init, fr->cg_cm[top.cgs.nr-1]);
-#endif
-
             /* Calc energy (no forces) on new positions.
              * Since we only need the intermolecular energy
              * and the RF exclusion terms of the inserted molecule occur
@@ -659,7 +757,6 @@ LegacySimulator::do_tpi()
                      state_global->lambda,
                      nullptr, fr, mdScheduleWork, nullptr, mu_tot, t, nullptr,
                      GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY |
-                     (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS : 0) |
                      (bStateChanged ? GMX_FORCE_STATECHANGED : 0),
                      DDBalanceRegionHandler(nullptr));
             std::feclearexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
@@ -667,7 +764,6 @@ LegacySimulator::do_tpi()
 
             cr->nnodes    = nnodes;
             bStateChanged = FALSE;
-            bNS           = FALSE;
 
             if (fr->dispersionCorrection)
             {
@@ -684,6 +780,10 @@ LegacySimulator::do_tpi()
             {
                 enerd->term[F_DISPCORR]  = 0;
             }
+            if (EEL_RF(fr->ic->eeltype))
+            {
+                enerd->term[F_EPOT]    += rfExclusionEnergy;
+            }
 
             epot               = enerd->term[F_EPOT];
             bEnergyOutOfBounds = FALSE;
@@ -750,7 +850,7 @@ LegacySimulator::do_tpi()
                     }
                     if (bRFExcl)
                     {
-                        sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU;
+                        sum_UgembU[e++] += rfExclusionEnergy*embU;
                     }
                     if (EEL_FULL(fr->ic->eeltype))
                     {
index a3c4b9d59c26c15738b6f044d07023f2081841fb..54b10eaf284dfee5c33d552f759b5b40856dbaad 100644 (file)
@@ -104,13 +104,32 @@ static real gridAtomDensity(int        numAtoms,
 
 void Grid::setDimensions(const int            ddZone,
                          const int            numAtoms,
-                         const rvec           lowerCorner,
-                         const rvec           upperCorner,
+                         gmx::RVec            lowerCorner,
+                         gmx::RVec            upperCorner,
                          real                 atomDensity,
                          const real           maxAtomGroupRadius,
                          const bool           haveFep,
                          gmx::PinningPolicy   pinningPolicy)
 {
+    /* We allow passing lowerCorner=upperCorner, in which case we need to
+     * create a finite sized bounding box to avoid division by zero.
+     * We use a minimum size such that the volume fits in float with some
+     * margin for computing and using the atom number density.
+     */
+    constexpr real c_minimumGridSize = 1e-10;
+    for (int d = 0; d < DIM; d++)
+    {
+        GMX_ASSERT(upperCorner[d] >= lowerCorner[d], "Upper corner should be larger than the lower corner");
+        if (upperCorner[d] - lowerCorner[d] < c_minimumGridSize)
+        {
+            /* Ensure we apply a correction to the bounding box */
+            real correction = std::max(std::abs(lowerCorner[d])*GMX_REAL_EPS,
+                                       0.5_real*c_minimumGridSize);
+            lowerCorner[d] -= correction;
+            upperCorner[d] += correction;
+        }
+    }
+
     /* For the home zone we compute the density when not set (=-1) or when =0 */
     if (ddZone == 0 && atomDensity <= 0)
     {
@@ -151,7 +170,7 @@ void Grid::setDimensions(const int            ddZone,
             tlen_y    = tlen*c_gpuNumClusterPerCellY;
         }
         /* We round ncx and ncy down, because we get less cell pairs
-         * in the nbsist when the fixed cell dimensions (x,y) are
+         * in the pairlist when the fixed cell dimensions (x,y) are
          * larger than the variable one (z) than the other way around.
          */
         dimensions_.numCells[XX] = std::max(1, static_cast<int>(size[XX]/tlen_x));
@@ -1409,12 +1428,9 @@ void Grid::setCellIndices(int                             ddZone,
         /* Set the cell indices for the moved particles */
         int n0 = numCellsTotal_*numAtomsPerCell;
         int n1 = numCellsTotal_*numAtomsPerCell + cxy_na_[numColumns()];
-        if (ddZone == 0)
+        for (int i = n0; i < n1; i++)
         {
-            for (int i = n0; i < n1; i++)
-            {
-                cells[atomIndices[i]] = i;
-            }
+            cells[atomIndices[i]] = i;
         }
     }
 
index 3da68f57724133578612bd1173642ec2fc15476d..4785f4a549855ea70732a99d5a5060e12752db62 100644 (file)
@@ -403,8 +403,8 @@ class Grid
         //! Sets the grid dimensions
         void setDimensions(int                   ddZone,
                            int                   numAtoms,
-                           const rvec            lowerCorner,
-                           const rvec            upperCorner,
+                           gmx::RVec             lowerCorner,
+                           gmx::RVec             upperCorner,
                            real                  atomDensity,
                            real                  maxAtomGroupRadius,
                            bool                  haveFep,
index 34369acf9498be2d1dbc67a48765827d3c1a2211..28a88bbeb90f0560f76f09c850c872d686db8ab9 100644 (file)
 namespace Nbnxm
 {
 
-//! Returns the number of DD zones
-static int numDDZones(const std::array<bool, DIM> &haveMultipleDomainsPerDim)
+//! Returns the number of search grids
+static int numGrids(const GridSet::DomainSetup &domainSetup)
 {
-    int  numDDZones = 1;
-    for (auto haveDD : haveMultipleDomainsPerDim)
+    int numGrids;
+    if (domainSetup.doTestParticleInsertion)
     {
-        if (haveDD)
+        numGrids = 2;
+    }
+    else
+    {
+        numGrids = 1;
+        for (auto haveDD : domainSetup.haveMultipleDomainsPerDim)
         {
-            numDDZones *= 2;
+            if (haveDD)
+            {
+                numGrids *= 2;
+            }
         }
     }
 
-    return numDDZones;
+    return numGrids;
 }
 
 GridSet::DomainSetup::DomainSetup(const int                 ePBC,
+                                  const bool                doTestParticleInsertion,
                                   const ivec               *numDDCells,
                                   const gmx_domdec_zones_t *ddZones) :
     ePBC(ePBC),
+    doTestParticleInsertion(doTestParticleInsertion),
     haveMultipleDomains(numDDCells != nullptr),
     zones(ddZones)
 {
@@ -83,14 +93,15 @@ GridSet::DomainSetup::DomainSetup(const int                 ePBC,
 }
 
 GridSet::GridSet(const int                 ePBC,
+                 const bool                doTestParticleInsertion,
                  const ivec               *numDDCells,
                  const gmx_domdec_zones_t *ddZones,
                  const PairlistType        pairlistType,
                  const bool                haveFep,
                  const int                 numThreads,
                  gmx::PinningPolicy        pinningPolicy) :
-    domainSetup_(ePBC, numDDCells, ddZones),
-    grids_(numDDZones(domainSetup_.haveMultipleDomainsPerDim), Grid(pairlistType, haveFep_)),
+    domainSetup_(ePBC, doTestParticleInsertion, numDDCells, ddZones),
+    grids_(numGrids(domainSetup_), Grid(pairlistType, haveFep_)),
     haveFep_(haveFep),
     numRealAtomsLocal_(0),
     numRealAtomsTotal_(0),
@@ -123,7 +134,7 @@ void GridSet::setLocalAtomOrder()
 
 // TODO: Move to gridset.cpp
 void GridSet::putOnGrid(const matrix                    box,
-                        const int                       ddZone,
+                        const int                       gridIndex,
                         const rvec                      lowerCorner,
                         const rvec                      upperCorner,
                         const gmx::UpdateGroupsCog     *updateGroupsCog,
@@ -136,29 +147,29 @@ void GridSet::putOnGrid(const matrix                    box,
                         const int                      *move,
                         nbnxn_atomdata_t               *nbat)
 {
-    Nbnxm::Grid  &grid = grids_[ddZone];
+    Nbnxm::Grid  &grid = grids_[gridIndex];
 
     int           cellOffset;
-    if (ddZone == 0)
+    if (gridIndex == 0)
     {
         cellOffset = 0;
     }
     else
     {
-        const Nbnxm::Grid &previousGrid = grids_[ddZone - 1];
+        const Nbnxm::Grid &previousGrid = grids_[gridIndex - 1];
         cellOffset = previousGrid.atomIndexEnd()/previousGrid.geometry().numAtomsPerCell;
     }
 
     const int n = atomEnd - atomStart;
 
     real      maxAtomGroupRadius;
-    if (ddZone == 0)
+    if (gridIndex == 0)
     {
         copy_mat(box, box_);
 
         numRealAtomsLocal_ = atomEnd - numAtomsMoved;
         /* We assume that nbnxn_put_on_grid is called first
-         * for the local atoms (ddZone=0).
+         * for the local atoms (gridIndex=0).
          */
         numRealAtomsTotal_ = atomEnd - numAtomsMoved;
 
@@ -182,8 +193,9 @@ void GridSet::putOnGrid(const matrix                    box,
     /* We always use the home zone (grid[0]) for setting the cell size,
      * since determining densities for non-local zones is difficult.
      */
-    // grid data used in GPU transfers inherits the gridset pinnin policy
-    auto pinPolicy = gridSetData_.cells.get_allocator().pinningPolicy();
+    const int ddZone = (domainSetup_.doTestParticleInsertion ? 0 : gridIndex);
+    // grid data used in GPU transfers inherits the gridset pinning policy
+    auto      pinPolicy = gridSetData_.cells.get_allocator().pinningPolicy();
     grid.setDimensions(ddZone, n - numAtomsMoved,
                        lowerCorner, upperCorner,
                        atomDensity,
@@ -220,11 +232,11 @@ void GridSet::putOnGrid(const matrix                    box,
     grid.setCellIndices(ddZone, cellOffset, &gridSetData_, gridWork_,
                         atomStart, atomEnd, atomInfo.data(), x, numAtomsMoved, nbat);
 
-    if (ddZone == 0)
+    if (gridIndex == 0)
     {
         nbat->natoms_local = nbat->numAtoms();
     }
-    if (ddZone == gmx::ssize(grids_) - 1)
+    if (gridIndex == gmx::ssize(grids_) - 1)
     {
         /* We are done setting up all grids, we can resize the force buffers */
         nbat->resizeForceBuffers();
index 38d9650e4f1fc49afd4eb12de57f4f97f23a4180..142a1f28b4126b53a18e6638c59d5dc197369734 100644 (file)
@@ -73,6 +73,12 @@ namespace Nbnxm
 
 /*! \internal
  * \brief Holds a set of search grids for the local + non-local DD zones
+ *
+ * The are three different possible setups:
+ * - a single grid, this is the standard case without domain decomposition
+ * - one grid for each domain decomposition zone
+ * - with test particle insertion there are two grids, one for the system
+ *   to insert in and one for the molecule that is inserted
  */
 class GridSet
 {
@@ -84,11 +90,14 @@ class GridSet
         {
             //! Constructor, without DD \p numDDCells and \p ddZones should be nullptr
             DomainSetup(int                       ePBC,
+                        bool                      doTestParticleInsertion,
                         const ivec               *numDDCells,
                         const gmx_domdec_zones_t *ddZones);
 
             //! The type of PBC
             int                       ePBC;
+            //! Tells whether we are doing test-particle insertion
+            bool                      doTestParticleInsertion;
             //! Are there multiple domains?
             bool                      haveMultipleDomains;
             //! Are there multiple domains along each dimension?
@@ -99,6 +108,7 @@ class GridSet
 
         //! Constructs a grid set for 1 or multiple DD zones, when numDDCells!=nullptr
         GridSet(int                       ePBC,
+                bool                      doTestParticleInsertion,
                 const ivec               *numDDCells,
                 const gmx_domdec_zones_t *ddZones,
                 PairlistType              pairlistType,
@@ -106,9 +116,9 @@ class GridSet
                 int                       numThreads,
                 gmx::PinningPolicy        pinningPolicy);
 
-        //! Puts the atoms in \p ddZone on the grid and copies the coordinates to \p nbat
+        //! Puts the atoms on the grid with index \p gridIndex and copies the coordinates to \p nbat
         void putOnGrid(const matrix                    box,
-                       int                             ddZone,
+                       int                             gridIndex,
                        const rvec                      lowerCorner,
                        const rvec                      upperCorner,
                        const gmx::UpdateGroupsCog     *updateGroupsCog,
index 6715fef9c2c5cca5936fbd9d140159110c7896ac..e87e6363c5a09f786daab12d104d321ce2cc8553 100644 (file)
@@ -472,6 +472,7 @@ init_nb_verlet(const gmx::MDLogger     &mdlog,
 
     auto pairSearch =
         std::make_unique<PairSearch>(ir->ePBC,
+                                     EI_TPI(ir->eI),
                                      DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
                                      DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
                                      pairlistParams.pairlistType,
index 43ed81cd32137f1c8d9c3fa6ab33d6a6cb216c49..85bb36920722141eb4d891593c036262103862fc 100644 (file)
@@ -4041,6 +4041,11 @@ PairlistSet::constructPairlists(const Nbnxm::GridSet          &gridSet,
 
     for (int zi = 0; zi < nzi; zi++)
     {
+        /* With TPI we do grid 1, the inserted molecule, versus grid 0, the rest */
+        if (gridSet.domainSetup().doTestParticleInsertion)
+        {
+            zi = 1;
+        }
         const Grid &iGrid = gridSet.grids()[zi];
 
         int                 zj0;
index 021f6cb25a4f1044df4167de6198b058a0ddc267..2d425de5efa14fee9bf60fefeccb2cf6c703a9f9 100644 (file)
@@ -109,13 +109,14 @@ PairsearchWork::~PairsearchWork()
 }
 
 PairSearch::PairSearch(const int                 ePBC,
+                       const bool                doTestParticleInsertion,
                        const ivec               *numDDCells,
                        const gmx_domdec_zones_t *ddZones,
                        const PairlistType        pairlistType,
                        const bool                haveFep,
                        const int                 maxNumThreads,
                        gmx::PinningPolicy        pinningPolicy) :
-    gridSet_(ePBC, numDDCells, ddZones, pairlistType, haveFep, maxNumThreads, pinningPolicy),
+    gridSet_(ePBC, doTestParticleInsertion, numDDCells, ddZones, pairlistType, haveFep, maxNumThreads, pinningPolicy),
     work_(maxNumThreads)
 {
     cycleCounting_.recordCycles_ = (getenv("GMX_NBNXN_CYCLE") != nullptr);
index 10ab4d15f9bac7289f7e48361c28ff323d685ac1..7bf8f421a38200b2b23f5158984ac68d45cdb2b7 100644 (file)
@@ -207,6 +207,7 @@ class PairSearch
          * \param[in] maxNumThreads   The maximum number of threads used in the search
          */
         PairSearch(int                       ePBC,
+                   bool                      doTestParticleInsertion,
                    const ivec               *numDDCells,
                    const gmx_domdec_zones_t *zones,
                    PairlistType              pairlistType,
index 452a771616b084ef838a86b486ed57b9ce664dba..a7e7b0a9956764e74f2c3d1c5414b53323572e9c 100644 (file)
@@ -655,7 +655,8 @@ static void print_hw_opt(FILE *fp, const gmx_hw_opt_t *hw_opt)
 void checkAndUpdateHardwareOptions(const gmx::MDLogger &mdlog,
                                    gmx_hw_opt_t        *hw_opt,
                                    const bool           isSimulationMasterRank,
-                                   const int            nPmeRanks)
+                                   const int            nPmeRanks,
+                                   const t_inputrec    *inputrec)
 {
     /* Currently hw_opt only contains default settings or settings supplied
      * by the user on the command line.
@@ -688,6 +689,18 @@ void checkAndUpdateHardwareOptions(const gmx::MDLogger &mdlog,
                       "compiled without thread-MPI");
         }
     }
+
+    /* With thread-MPI we need to handle TPI and #OpenMP-threads=auto early,
+     * so we can parallelize using MPI only. The general check is done later.
+     */
+    if (GMX_THREAD_MPI && isSimulationMasterRank)
+    {
+        GMX_RELEASE_ASSERT(inputrec, "Expect a valid inputrec");
+        if (EI_TPI(inputrec->eI) && hw_opt->nthreads_omp == 0)
+        {
+            hw_opt->nthreads_omp = 1;
+        }
+    }
     /* With thread-MPI the master thread sets hw_opt->totNumThreadsIsAuto.
      * The other threads receive a partially processed hw_opt from the master
      * thread and should not set hw_opt->totNumThreadsIsAuto again.
@@ -801,8 +814,18 @@ void checkAndUpdateRequestedNumOpenmpThreads(gmx_hw_opt_t         *hw_opt,
                                              const gmx_multisim_t *ms,
                                              int                   numRanksOnThisNode,
                                              PmeRunMode            pmeRunMode,
-                                             const gmx_mtop_t     &mtop)
+                                             const gmx_mtop_t     &mtop,
+                                             const t_inputrec     &inputrec)
 {
+    if (EI_TPI(inputrec.eI))
+    {
+        if (hw_opt->nthreads_omp > 1)
+        {
+            gmx_fatal(FARGS, "You requested OpenMP parallelization, which is not supported with TPI.");
+        }
+        hw_opt->nthreads_omp = 1;
+    }
+
     if (GMX_THREAD_MPI)
     {
 
index 6d8f1c235465098936741402880385adbe0d473d..2bea6245a03d20a296b95f618cec1fb47c5b6c25 100644 (file)
@@ -104,11 +104,13 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
  *  \param[in, out] hw_opt                    Hardware-related and threading options
  *  \param[in]      isSimulationMasterRank
  *  \param[in]      nPmeRanks                 Number of PME ranks
+ *  \param[in]      inputrec                  The input record, should point to a valid object when \p isSimulationMasterRank = true
  *  */
 void checkAndUpdateHardwareOptions(const gmx::MDLogger &mdlog,
                                    gmx_hw_opt_t        *hw_opt,
                                    bool                 isSimulationMasterRank,
-                                   int                  nPmeRanks);
+                                   int                  nPmeRanks,
+                                   const t_inputrec    *inputrec);
 
 /*! \brief Check, and if necessary update, the number of OpenMP threads requested
  *
@@ -120,7 +122,8 @@ void checkAndUpdateRequestedNumOpenmpThreads(gmx_hw_opt_t         *hw_opt,
                                              const gmx_multisim_t *ms,
                                              int                   numRanksOnThisNode,
                                              PmeRunMode            pmeRunMode,
-                                             const gmx_mtop_t     &mtop);
+                                             const gmx_mtop_t     &mtop,
+                                             const t_inputrec     &inputrec);
 
 namespace gmx
 {
index d1b4c0f4adbcf8b8e621a55e5deb684dd8eaf663..da17cd990fe9944fc3c23812ca8e021199596bde 100644 (file)
@@ -83,13 +83,26 @@ gmx_add_gtest_executable(
     normalmodes.cpp
     rerun.cpp
     simple_mdrun.cpp
-    tpitest.cpp
     # pseudo-library for code for mdrun
     $<TARGET_OBJECTS:mdrun_objlib>
     )
 target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
 gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST)
 
+# TPI does not support OpenMP, so we need a separate test binary
+set(testname "MdrunTpiTests")
+set(exename "mdrun-tpi-test")
+
+gmx_add_gtest_executable(
+    ${exename}
+    # files with code for tests
+    tpitest.cpp
+    # pseudo-library for code for mdrun
+    $<TARGET_OBJECTS:mdrun_objlib>
+    )
+target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
+gmx_register_gtest_test(${testname} ${exename} INTEGRATION_TEST)
+
 # Tests that only make sense to run with multiple ranks and/or real
 # MPI are implemented here.
 set(testname "MdrunMpiTests")
index 1258a57f9d6a32580345c66f9aec23e1088df294..b080e4fbffcae79e0f0de10d1a19abe9515c426e 100644 (file)
@@ -127,6 +127,7 @@ TEST_P(TpiTest, ReproducesOutput)
         epsilon-r                = 1
         epsilon-rf               = 0
         vdw-type                 = cut-off
+        vdw-modifier             = none
         rvdw                     = 0.9
         Tcoupl                   = no
         tc-grps                  = System
@@ -139,8 +140,7 @@ TEST_P(TpiTest, ReproducesOutput)
     runTest();
 }
 
-// Should be re-enabled when TPI supports the Verlet scheme
-INSTANTIATE_TEST_CASE_P(DISABLED_Simple, TpiTest, ::testing::Values(1993, 2994));
+INSTANTIATE_TEST_CASE_P(Simple, TpiTest, ::testing::Values(1993, 2994));
 
 }  // namespace
 }  // namespace test