From 39566cbfcc728d30750c534d1f12917ff35aafc3 Mon Sep 17 00:00:00 2001
From: Berk Hess <hess@kth.se>
Date: Tue, 10 Sep 2019 11:01:19 +0200
Subject: [PATCH] Enable TPI with the Verlet cut-off scheme

This requires a special treatment for the nbnxm gridding and search,
with one grid for the system to insert in and one grid for
the molecule to insert.
Also reenabled the TPI tests.

Change-Id: Ia3888daf4b06a94b0a7a03ec17b7bbe27084ac3d
---
 docs/user-guide/mdp-options.rst               |  10 +-
 src/gromacs/gmxpreprocess/readir.cpp          |  10 +-
 src/gromacs/mdlib/forcerec.cpp                |  11 +-
 src/gromacs/mdlib/sim_util.cpp                |  19 +-
 src/gromacs/mdrun/runner.cpp                  |  58 ++++--
 src/gromacs/mdrun/tpi.cpp                     | 194 +++++++++++++-----
 src/gromacs/nbnxm/grid.cpp                    |  32 ++-
 src/gromacs/nbnxm/grid.h                      |   4 +-
 src/gromacs/nbnxm/gridset.cpp                 |  50 +++--
 src/gromacs/nbnxm/gridset.h                   |  14 +-
 src/gromacs/nbnxm/nbnxm_setup.cpp             |   1 +
 src/gromacs/nbnxm/pairlist.cpp                |   5 +
 src/gromacs/nbnxm/pairsearch.cpp              |   3 +-
 src/gromacs/nbnxm/pairsearch.h                |   1 +
 .../taskassignment/resourcedivision.cpp       |  27 ++-
 src/gromacs/taskassignment/resourcedivision.h |   7 +-
 src/programs/mdrun/tests/CMakeLists.txt       |  15 +-
 src/programs/mdrun/tests/tpitest.cpp          |   4 +-
 18 files changed, 336 insertions(+), 129 deletions(-)

diff --git a/docs/user-guide/mdp-options.rst b/docs/user-guide/mdp-options.rst
index 711c877128..1f76fd1972 100644
--- a/docs/user-guide/mdp-options.rst
+++ b/docs/user-guide/mdp-options.rst
@@ -563,10 +563,12 @@ Neighbor searching
 .. mdp:: rlist
 
    (1) [nm]
-   Cut-off distance for the short-range neighbor list. With the
-   :mdp-value:`cutoff-scheme=Verlet` :mdp:`cutoff-scheme`, this is by default set by the
-   :mdp:`verlet-buffer-tolerance` option and the value of
-   :mdp:`rlist` is ignored.
+   Cut-off distance for the short-range neighbor list. With dynamics,
+   this is by default set by the :mdp:`verlet-buffer-tolerance` option
+   and the value of :mdp:`rlist` is ignored. Without dynamics, this
+   is by default set to the maximum cut-off plus 5% buffer, except
+   for test particle insertion, where the buffer is managed exactly
+   and automatically.
 
 
 Electrostatics
diff --git a/src/gromacs/gmxpreprocess/readir.cpp b/src/gromacs/gmxpreprocess/readir.cpp
index 8f92c0cacd..c0623751e8 100644
--- a/src/gromacs/gmxpreprocess/readir.cpp
+++ b/src/gromacs/gmxpreprocess/readir.cpp
@@ -368,7 +368,13 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
 
         rc_max = std::max(ir->rvdw, ir->rcoulomb);
 
-        if (ir->verletbuf_tol <= 0)
+        if (EI_TPI(ir->eI))
+        {
+            /* With TPI we set the pairlist cut-off later using the radius of the insterted molecule */
+            ir->verletbuf_tol = 0;
+            ir->rlist         = rc_max;
+        }
+        else if (ir->verletbuf_tol <= 0)
         {
             if (ir->verletbuf_tol == 0)
             {
@@ -546,8 +552,6 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
         CHECK(ir->nstlist <= 0);
         sprintf(err_buf, "TPI does not work with full electrostatics other than PME");
         CHECK(EEL_FULL(ir->coulombtype) && !EEL_PME(ir->coulombtype));
-        sprintf(err_buf, "TPI does not work (yet) with the Verlet cut-off scheme");
-        CHECK(ir->cutoff_scheme == ecutsVERLET);
     }
 
     /* SHAKE / LINCS */
diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp
index 8230129f2b..0b478cf110 100644
--- a/src/gromacs/mdlib/forcerec.cpp
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -1475,7 +1475,6 @@ void init_forcerec(FILE                             *fp,
     real           rtab;
     char          *env;
     double         dbl;
-    const t_block *cgs;
     gmx_bool       bGenericKernelOnly;
     gmx_bool       bFEP_NonBonded;
 
@@ -1491,16 +1490,8 @@ void init_forcerec(FILE                             *fp,
     if (EI_TPI(ir->eI))
     {
         /* Set to the size of the molecule to be inserted (the last one) */
-        /* Because of old style topologies, we have to use the last cg
-         * instead of the last molecule type.
-         */
-        cgs       = &mtop->moltype[mtop->molblock.back().type].cgs;
-        fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
         gmx::RangePartitioning molecules = gmx_mtop_molecules(*mtop);
-        if (fr->n_tpi != molecules.block(molecules.numBlocks() - 1).size())
-        {
-            gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
-        }
+        fr->n_tpi = molecules.block(molecules.numBlocks() - 1).size();
     }
     else
     {
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp
index 768ca45777..945165cdc9 100644
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -1118,7 +1118,7 @@ void do_force(FILE                                     *fplog,
             nbv->atomdata_init_add_nbat_f_to_f_gpu();
         }
     }
-    else
+    else if (!EI_TPI(inputrec->eI))
     {
         if (useGpuXBufOps == BufferOpsUseGpu::True)
         {
@@ -1376,13 +1376,16 @@ void do_force(FILE                                     *fplog,
                          step, nrnb, wcycle);
         }
 
-        /* Add all the non-bonded force to the normal force array.
-         * This can be split into a local and a non-local part when overlapping
-         * communication with calculation with domain decomposition.
-         */
-        wallcycle_stop(wcycle, ewcFORCE);
-        nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.forceWithShiftForces().force());
-        wallcycle_start_nocount(wcycle, ewcFORCE);
+        if (forceFlags.computeForces)
+        {
+            /* Add all the non-bonded force to the normal force array.
+             * This can be split into a local and a non-local part when overlapping
+             * communication with calculation with domain decomposition.
+             */
+            wallcycle_stop(wcycle, ewcFORCE);
+            nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.forceWithShiftForces().force());
+            wallcycle_start_nocount(wcycle, ewcFORCE);
+        }
 
         /* If there are multiple fshift output buffers we need to reduce them */
         if (forceFlags.computeVirial)
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp
index 64248d311d..5caa3caee5 100644
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -394,24 +394,36 @@ namespace gmx
  * falling back to CPU code. With thread-MPI, only the first
  * call to this function should have \c issueWarning true. */
 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
-                                               const t_inputrec *ir,
+                                               const t_inputrec &ir,
                                                bool              issueWarning)
 {
-    if (ir->opts.ngener - ir->nwall > 1)
+    bool        gpuIsUseful = true;
+    std::string warning;
+
+    if (ir.opts.ngener - ir.nwall > 1)
     {
         /* The GPU code does not support more than one energy group.
          * If the user requested GPUs explicitly, a fatal error is given later.
          */
-        if (issueWarning)
-        {
-            GMX_LOG(mdlog.warning).asParagraph()
-                .appendText("Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
-                            "For better performance, run on the GPU without energy groups and then do "
-                            "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.");
-        }
-        return false;
+        gpuIsUseful = false;
+        warning     =
+            "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
+            "For better performance, run on the GPU without energy groups and then do "
+            "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
     }
-    return true;
+
+    if (EI_TPI(ir.eI))
+    {
+        gpuIsUseful = false;
+        warning     = "TPI is not implemented for GPUs.";
+    }
+
+    if (!gpuIsUseful && issueWarning)
+    {
+        GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
+    }
+
+    return gpuIsUseful;
 }
 
 //! Initializes the logger for mdrun.
@@ -605,8 +617,6 @@ int Mdrunner::mdrunner()
 
     /* CAUTION: threads may be started later on in this function, so
        cr doesn't reflect the final parallel state right now */
-    t_inputrec                      inputrecInstance;
-    t_inputrec                     *inputrec = &inputrecInstance;
     gmx_mtop_t                      mtop;
 
     bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data());
@@ -659,6 +669,9 @@ int Mdrunner::mdrunner()
     // Print citation requests after all software/hardware printing
     pleaseCiteGromacs(fplog);
 
+    // TODO Replace this by unique_ptr once t_inputrec is C++
+    t_inputrec               inputrecInstance;
+    t_inputrec              *inputrec = nullptr;
     std::unique_ptr<t_state> globalState;
 
     if (SIMMASTER(cr))
@@ -667,11 +680,13 @@ int Mdrunner::mdrunner()
         globalState = std::make_unique<t_state>();
 
         /* Read (nearly) all data required for the simulation */
-        read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), inputrec, globalState.get(), &mtop);
+        read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()),
+                       &inputrecInstance, globalState.get(), &mtop);
+        inputrec = &inputrecInstance;
     }
 
     /* Check and update the hardware options for internal consistency */
-    checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks);
+    checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks, inputrec);
 
     if (GMX_THREAD_MPI && SIMMASTER(cr))
     {
@@ -679,6 +694,8 @@ int Mdrunner::mdrunner()
         bool useGpuForPme       = false;
         try
         {
+            GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
+
             // If the user specified the number of ranks, then we must
             // respect that, but in default mode, we need to allow for
             // the number of GPUs to choose the number of ranks.
@@ -686,7 +703,7 @@ int Mdrunner::mdrunner()
             useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
                     (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
                     canUseGpuForNonbonded,
-                    gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI),
+                    gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
                     hw_opt.nthreads_tmpi);
             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
@@ -723,8 +740,13 @@ int Mdrunner::mdrunner()
     if (PAR(cr))
     {
         /* now broadcast everything to the non-master nodes/threads: */
+        if (!SIMMASTER(cr))
+        {
+            inputrec = &inputrecInstance;
+        }
         init_parallel(cr, inputrec, &mtop);
     }
+    GMX_RELEASE_ASSERT(inputrec != nullptr, "All range should have a valid inputrec now");
 
     // Now each rank knows the inputrec that SIMMASTER read and used,
     // and (if applicable) cr->nnodes has been assigned the number of
@@ -754,7 +776,7 @@ int Mdrunner::mdrunner()
         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
                                                                 emulateGpuNonbonded,
                                                                 canUseGpuForNonbonded,
-                                                                gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
+                                                                gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI),
                                                                 gpusWereDetected);
         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
                                                     *hwinfo, *inputrec, mtop,
@@ -1044,7 +1066,7 @@ int Mdrunner::mdrunner()
                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
     /* Check and update the number of OpenMP threads requested */
     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
-                                            pmeRunMode, mtop);
+                                            pmeRunMode, mtop, *inputrec);
 
     gmx_omp_nthreads_init(mdlog, cr,
                           hwinfo->nthreads_hw_avail,
diff --git a/src/gromacs/mdrun/tpi.cpp b/src/gromacs/mdrun/tpi.cpp
index 382a32be2f..3afadf161f 100644
--- a/src/gromacs/mdrun/tpi.cpp
+++ b/src/gromacs/mdrun/tpi.cpp
@@ -70,6 +70,7 @@
 #include "gromacs/mdlib/energyoutput.h"
 #include "gromacs/mdlib/force.h"
 #include "gromacs/mdlib/force_flags.h"
+#include "gromacs/mdlib/gmx_omp_nthreads.h"
 #include "gromacs/mdlib/mdatoms.h"
 #include "gromacs/mdlib/tgroup.h"
 #include "gromacs/mdlib/update.h"
@@ -82,6 +83,7 @@
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/mdtypes/mdrunoptions.h"
 #include "gromacs/mdtypes/state.h"
+#include "gromacs/nbnxm/nbnxm.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/random/threefry.h"
 #include "gromacs/random/uniformrealdistribution.h"
@@ -129,6 +131,31 @@ static void realloc_bins(double **bin, int *nbin, int nbin_new)
     }
 }
 
+//! Computes and returns the RF exclusion energy for the last molecule starting at \p beginAtom
+static real
+reactionFieldExclusionCorrection(gmx::ArrayRef<const gmx::RVec> x,
+                                 const t_mdatoms               &mdatoms,
+                                 const interaction_const_t     &ic,
+                                 const int                      beginAtom)
+{
+    real energy = 0;
+
+    for (int i = beginAtom; i < mdatoms.homenr; i++)
+    {
+        const real qi = mdatoms.chargeA[i];
+        energy -= 0.5*qi*qi*ic.c_rf;
+
+        for (int j = i + 1; j < mdatoms.homenr; j++)
+        {
+            const real qj  = mdatoms.chargeA[j];
+            const real rsq = distance2(x[i], x[j]);
+            energy += qi*qj*(ic.k_rf*rsq - ic.c_rf);
+        }
+    }
+
+    return ic.epsfac*energy;
+}
+
 namespace gmx
 {
 
@@ -136,6 +163,8 @@ namespace gmx
 void
 LegacySimulator::do_tpi()
 {
+    GMX_RELEASE_ASSERT(gmx_omp_nthreads_get(emntDefault) == 1, "TPI does not support OpenMP");
+
     gmx_localtop_t          top;
     PaddedVector<gmx::RVec> f {};
     real                    lambda, t, temp, beta, drmax, epot;
@@ -146,7 +175,7 @@ LegacySimulator::do_tpi()
     tensor                  force_vir, shake_vir, vir, pres;
     int                     a_tp0, a_tp1, ngid, gid_tp, nener, e;
     rvec                   *x_mol;
-    rvec                    mu_tot, x_init, dx, x_tp;
+    rvec                    mu_tot, x_init, dx;
     int                     nnodes, frame;
     int64_t                 frame_step_prev, frame_step;
     int64_t                 nsteps, stepblocksize = 0, step;
@@ -177,11 +206,6 @@ LegacySimulator::do_tpi()
     real bU_bin_limit      = 50;
     real bU_logV_bin_limit = bU_bin_limit + 10;
 
-    if (inputrec->cutoff_scheme == ecutsVERLET)
-    {
-        gmx_fatal(FARGS, "TPI does not work (yet) with the Verlet cut-off scheme");
-    }
-
     nnodes = cr->nnodes;
 
     gmx_mtop_generate_local_top(*top_global, &top, inputrec->efep != efepNO);
@@ -279,13 +303,31 @@ LegacySimulator::do_tpi()
         fprintf(debug, "TPI atoms %d-%d\n", a_tp0, a_tp1);
     }
 
-    GMX_RELEASE_ASSERT(inputrec->rcoulomb <= inputrec->rlist && inputrec->rvdw <= inputrec->rlist, "Twin-range interactions are not supported with TPI");
+    auto x = makeArrayRef(state_global->x);
+
+    if (EEL_PME(fr->ic->eeltype))
+    {
+        gmx_pme_reinit_atoms(fr->pmedata, a_tp0, nullptr);
+    }
+
+    /* With reacion-field we have distance dependent potentials
+     * between excluded atoms, we need to add these separately
+     * for the inserted molecule.
+     */
+    real rfExclusionEnergy = 0;
+    if (EEL_RF(fr->ic->eeltype))
+    {
+        rfExclusionEnergy = reactionFieldExclusionCorrection(x, *mdatoms, *fr->ic, a_tp0);
+        if (debug)
+        {
+            fprintf(debug, "RF exclusion correction for inserted molecule: %f kJ/mol\n", rfExclusionEnergy);
+        }
+    }
 
     snew(x_mol, a_tp1-a_tp0);
 
     bDispCorr = (inputrec->eDispCorr != edispcNO);
     bCharge   = FALSE;
-    auto x = makeArrayRef(state_global->x);
     for (i = a_tp0; i < a_tp1; i++)
     {
         /* Copy the coordinates of the molecule to be insterted */
@@ -296,12 +338,24 @@ LegacySimulator::do_tpi()
     }
     bRFExcl = (bCharge && EEL_RF(fr->ic->eeltype));
 
-    // TODO: Calculate the center of geometry of the molecule to insert
-#if 0
-    calc_cgcm(fplog, cg_tp, cg_tp+1, &(top.cgs), state_global->x.rvec_array(), fr->cg_cm);
+    // Calculate the center of geometry of the molecule to insert
+    rvec cog = { 0, 0, 0 };
+    for (int a = a_tp0; a < a_tp1; a++)
+    {
+        rvec_inc(cog, x[a]);
+    }
+    svmul(1.0_real/(a_tp1 - a_tp0), cog, cog);
+    real molRadius = 0;
+    for (int a = a_tp0; a < a_tp1; a++)
+    {
+        molRadius = std::max(molRadius, distance2(x[a], cog));
+    }
+    molRadius = std::sqrt(molRadius);
+
+    const real maxCutoff =  std::max(inputrec->rvdw, inputrec->rcoulomb);
     if (bCavity)
     {
-        if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog)
+        if (norm(cog) > 0.5*maxCutoff && fplog)
         {
             fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
             fprintf(stderr, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
@@ -312,10 +366,9 @@ LegacySimulator::do_tpi()
         /* Center the molecule to be inserted at zero */
         for (i = 0; i < a_tp1-a_tp0; i++)
         {
-            rvec_dec(x_mol[i], fr->cg_cm[cg_tp]);
+            rvec_dec(x_mol[i], cog);
         }
     }
-#endif
 
     if (fplog)
     {
@@ -330,6 +383,8 @@ LegacySimulator::do_tpi()
     {
         if (inputrec->nstlist > 1)
         {
+
+            /* With the same pair list we insert in a sphere of radius rtpi  in different orientations */
             if (drmax == 0 && a_tp1-a_tp0 == 1)
             {
                 gmx_fatal(FARGS, "Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense", inputrec->nstlist, drmax);
@@ -348,12 +403,27 @@ LegacySimulator::do_tpi()
         }
     }
 
+    /* With the same pair list we insert in a sphere of radius rtpi
+     * in different orientations. We generate the pairlist with all
+     * inserted atoms located in the center of the sphere, so we need
+     * a buffer of size of the sphere and molecule radius.
+     */
+    inputrec->rlist = maxCutoff + 2*inputrec->rtpi + 2*molRadius;
+    fr->rlist       = inputrec->rlist;
+    fr->nbv->changePairlistRadii(inputrec->rlist, inputrec->rlist);
+
     ngid   = groups->groups[SimulationAtomGroupType::EnergyOutput].size();
-    // TODO: Figure out which energy group to use
-#if 0
-    gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]);
-#endif
-    gid_tp = 0;
+    gid_tp = GET_CGINFO_GID(fr->cginfo[a_tp0]);
+    for (int a = a_tp0 + 1; a < a_tp1; a++)
+    {
+        if (GET_CGINFO_GID(fr->cginfo[a]) != gid_tp)
+        {
+            fprintf(fplog,
+                    "NOTE: Atoms in the molecule to insert belong to different energy groups.\n"
+                    "      Only contributions to the group of the first atom will be reported.\n");
+            break;
+        }
+    }
     nener  = 1 + ngid;
     if (bDispCorr)
     {
@@ -503,13 +573,25 @@ LegacySimulator::do_tpi()
             copy_rvec(rerun_fr.x[i], x[i]);
         }
         copy_mat(rerun_fr.box, state_global->box);
+        const matrix &box = state_global->box;
 
-        V    = det(state_global->box);
+        V    = det(box);
         logV = log(V);
 
         bStateChanged = TRUE;
         bNS           = TRUE;
 
+        put_atoms_in_box(fr->ePBC, box, x);
+
+        /* Put all atoms except for the inserted ones on the grid */
+        rvec vzero       = { 0, 0, 0 };
+        rvec boxDiagonal = { box[XX][XX], box[YY][YY], box[ZZ][ZZ] };
+        nbnxn_put_on_grid(fr->nbv.get(), box,
+                          0, vzero, boxDiagonal,
+                          nullptr, 0, a_tp0, -1,
+                          fr->cginfo, x,
+                          0, nullptr);
+
         step = cr->nodeid*stepblocksize;
         while (step < nsteps)
         {
@@ -536,24 +618,6 @@ LegacySimulator::do_tpi()
                         x_init[d] = dist(rng)*state_global->box[d][d];
                     }
                 }
-
-                if (inputrec->nstlist == 1)
-                {
-                    copy_rvec(x_init, x_tp);
-                }
-                else
-                {
-                    /* Generate coordinates within |dx|=drmax of x_init */
-                    do
-                    {
-                        for (d = 0; d < DIM; d++)
-                        {
-                            dx[d] = (2*dist(rng) - 1)*drmax;
-                        }
-                    }
-                    while (norm2(dx) > drmax*drmax);
-                    rvec_add(x_init, dx, x_tp);
-                }
             }
             else
             {
@@ -587,6 +651,38 @@ LegacySimulator::do_tpi()
                         }
                     }
                 }
+            }
+
+            if (bNS)
+            {
+                for (int a = a_tp0; a < a_tp1; a++)
+                {
+                    x[a] = x_init;
+                }
+
+                /* Put the inserted molecule on it's own search grid */
+                nbnxn_put_on_grid(fr->nbv.get(), box,
+                                  1, x_init, x_init,
+                                  nullptr, a_tp0, a_tp1, -1,
+                                  fr->cginfo, x,
+                                  0, nullptr);
+
+                /* TODO: Avoid updating all atoms at every bNS step */
+                fr->nbv->setAtomProperties(*mdatoms, fr->cginfo);
+
+                fr->nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
+                                           &top.excls, step, nrnb);
+
+                bNS = FALSE;
+            }
+
+            /* Add random displacement uniformly distributed in a sphere
+             * of radius rtpi. We don't need to do this is we generate
+             * a new center location every step.
+             */
+            rvec x_tp;
+            if (bCavity || inputrec->nstlist > 1)
+            {
                 /* Generate coordinates within |dx|=drmax of x_init */
                 do
                 {
@@ -598,6 +694,10 @@ LegacySimulator::do_tpi()
                 while (norm2(dx) > drmax*drmax);
                 rvec_add(x_init, dx, x_tp);
             }
+            else
+            {
+                copy_rvec(x_init, x_tp);
+            }
 
             if (a_tp1 - a_tp0 == 1)
             {
@@ -624,18 +724,16 @@ LegacySimulator::do_tpi()
                 }
             }
 
+            /* Note: NonLocal refers to the inserted molecule */
+            fr->nbv->setCoordinates(Nbnxm::AtomLocality::NonLocal, false,
+                                    x, BufferOpsUseGpu::False, nullptr);
+
             /* Clear some matrix variables  */
             clear_mat(force_vir);
             clear_mat(shake_vir);
             clear_mat(vir);
             clear_mat(pres);
 
-            /* Set the center of geometry mass of the test molecule */
-            // TODO: Compute and set the COG
-#if 0
-            copy_rvec(x_init, fr->cg_cm[top.cgs.nr-1]);
-#endif
-
             /* Calc energy (no forces) on new positions.
              * Since we only need the intermolecular energy
              * and the RF exclusion terms of the inserted molecule occur
@@ -659,7 +757,6 @@ LegacySimulator::do_tpi()
                      state_global->lambda,
                      nullptr, fr, mdScheduleWork, nullptr, mu_tot, t, nullptr,
                      GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY |
-                     (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS : 0) |
                      (bStateChanged ? GMX_FORCE_STATECHANGED : 0),
                      DDBalanceRegionHandler(nullptr));
             std::feclearexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
@@ -667,7 +764,6 @@ LegacySimulator::do_tpi()
 
             cr->nnodes    = nnodes;
             bStateChanged = FALSE;
-            bNS           = FALSE;
 
             if (fr->dispersionCorrection)
             {
@@ -684,6 +780,10 @@ LegacySimulator::do_tpi()
             {
                 enerd->term[F_DISPCORR]  = 0;
             }
+            if (EEL_RF(fr->ic->eeltype))
+            {
+                enerd->term[F_EPOT]    += rfExclusionEnergy;
+            }
 
             epot               = enerd->term[F_EPOT];
             bEnergyOutOfBounds = FALSE;
@@ -750,7 +850,7 @@ LegacySimulator::do_tpi()
                     }
                     if (bRFExcl)
                     {
-                        sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU;
+                        sum_UgembU[e++] += rfExclusionEnergy*embU;
                     }
                     if (EEL_FULL(fr->ic->eeltype))
                     {
diff --git a/src/gromacs/nbnxm/grid.cpp b/src/gromacs/nbnxm/grid.cpp
index a3c4b9d59c..54b10eaf28 100644
--- a/src/gromacs/nbnxm/grid.cpp
+++ b/src/gromacs/nbnxm/grid.cpp
@@ -104,13 +104,32 @@ static real gridAtomDensity(int        numAtoms,
 
 void Grid::setDimensions(const int            ddZone,
                          const int            numAtoms,
-                         const rvec           lowerCorner,
-                         const rvec           upperCorner,
+                         gmx::RVec            lowerCorner,
+                         gmx::RVec            upperCorner,
                          real                 atomDensity,
                          const real           maxAtomGroupRadius,
                          const bool           haveFep,
                          gmx::PinningPolicy   pinningPolicy)
 {
+    /* We allow passing lowerCorner=upperCorner, in which case we need to
+     * create a finite sized bounding box to avoid division by zero.
+     * We use a minimum size such that the volume fits in float with some
+     * margin for computing and using the atom number density.
+     */
+    constexpr real c_minimumGridSize = 1e-10;
+    for (int d = 0; d < DIM; d++)
+    {
+        GMX_ASSERT(upperCorner[d] >= lowerCorner[d], "Upper corner should be larger than the lower corner");
+        if (upperCorner[d] - lowerCorner[d] < c_minimumGridSize)
+        {
+            /* Ensure we apply a correction to the bounding box */
+            real correction = std::max(std::abs(lowerCorner[d])*GMX_REAL_EPS,
+                                       0.5_real*c_minimumGridSize);
+            lowerCorner[d] -= correction;
+            upperCorner[d] += correction;
+        }
+    }
+
     /* For the home zone we compute the density when not set (=-1) or when =0 */
     if (ddZone == 0 && atomDensity <= 0)
     {
@@ -151,7 +170,7 @@ void Grid::setDimensions(const int            ddZone,
             tlen_y    = tlen*c_gpuNumClusterPerCellY;
         }
         /* We round ncx and ncy down, because we get less cell pairs
-         * in the nbsist when the fixed cell dimensions (x,y) are
+         * in the pairlist when the fixed cell dimensions (x,y) are
          * larger than the variable one (z) than the other way around.
          */
         dimensions_.numCells[XX] = std::max(1, static_cast<int>(size[XX]/tlen_x));
@@ -1409,12 +1428,9 @@ void Grid::setCellIndices(int                             ddZone,
         /* Set the cell indices for the moved particles */
         int n0 = numCellsTotal_*numAtomsPerCell;
         int n1 = numCellsTotal_*numAtomsPerCell + cxy_na_[numColumns()];
-        if (ddZone == 0)
+        for (int i = n0; i < n1; i++)
         {
-            for (int i = n0; i < n1; i++)
-            {
-                cells[atomIndices[i]] = i;
-            }
+            cells[atomIndices[i]] = i;
         }
     }
 
diff --git a/src/gromacs/nbnxm/grid.h b/src/gromacs/nbnxm/grid.h
index 3da68f5772..4785f4a549 100644
--- a/src/gromacs/nbnxm/grid.h
+++ b/src/gromacs/nbnxm/grid.h
@@ -403,8 +403,8 @@ class Grid
         //! Sets the grid dimensions
         void setDimensions(int                   ddZone,
                            int                   numAtoms,
-                           const rvec            lowerCorner,
-                           const rvec            upperCorner,
+                           gmx::RVec             lowerCorner,
+                           gmx::RVec             upperCorner,
                            real                  atomDensity,
                            real                  maxAtomGroupRadius,
                            bool                  haveFep,
diff --git a/src/gromacs/nbnxm/gridset.cpp b/src/gromacs/nbnxm/gridset.cpp
index 34369acf94..28a88bbeb9 100644
--- a/src/gromacs/nbnxm/gridset.cpp
+++ b/src/gromacs/nbnxm/gridset.cpp
@@ -54,25 +54,35 @@
 namespace Nbnxm
 {
 
-//! Returns the number of DD zones
-static int numDDZones(const std::array<bool, DIM> &haveMultipleDomainsPerDim)
+//! Returns the number of search grids
+static int numGrids(const GridSet::DomainSetup &domainSetup)
 {
-    int  numDDZones = 1;
-    for (auto haveDD : haveMultipleDomainsPerDim)
+    int numGrids;
+    if (domainSetup.doTestParticleInsertion)
     {
-        if (haveDD)
+        numGrids = 2;
+    }
+    else
+    {
+        numGrids = 1;
+        for (auto haveDD : domainSetup.haveMultipleDomainsPerDim)
         {
-            numDDZones *= 2;
+            if (haveDD)
+            {
+                numGrids *= 2;
+            }
         }
     }
 
-    return numDDZones;
+    return numGrids;
 }
 
 GridSet::DomainSetup::DomainSetup(const int                 ePBC,
+                                  const bool                doTestParticleInsertion,
                                   const ivec               *numDDCells,
                                   const gmx_domdec_zones_t *ddZones) :
     ePBC(ePBC),
+    doTestParticleInsertion(doTestParticleInsertion),
     haveMultipleDomains(numDDCells != nullptr),
     zones(ddZones)
 {
@@ -83,14 +93,15 @@ GridSet::DomainSetup::DomainSetup(const int                 ePBC,
 }
 
 GridSet::GridSet(const int                 ePBC,
+                 const bool                doTestParticleInsertion,
                  const ivec               *numDDCells,
                  const gmx_domdec_zones_t *ddZones,
                  const PairlistType        pairlistType,
                  const bool                haveFep,
                  const int                 numThreads,
                  gmx::PinningPolicy        pinningPolicy) :
-    domainSetup_(ePBC, numDDCells, ddZones),
-    grids_(numDDZones(domainSetup_.haveMultipleDomainsPerDim), Grid(pairlistType, haveFep_)),
+    domainSetup_(ePBC, doTestParticleInsertion, numDDCells, ddZones),
+    grids_(numGrids(domainSetup_), Grid(pairlistType, haveFep_)),
     haveFep_(haveFep),
     numRealAtomsLocal_(0),
     numRealAtomsTotal_(0),
@@ -123,7 +134,7 @@ void GridSet::setLocalAtomOrder()
 
 // TODO: Move to gridset.cpp
 void GridSet::putOnGrid(const matrix                    box,
-                        const int                       ddZone,
+                        const int                       gridIndex,
                         const rvec                      lowerCorner,
                         const rvec                      upperCorner,
                         const gmx::UpdateGroupsCog     *updateGroupsCog,
@@ -136,29 +147,29 @@ void GridSet::putOnGrid(const matrix                    box,
                         const int                      *move,
                         nbnxn_atomdata_t               *nbat)
 {
-    Nbnxm::Grid  &grid = grids_[ddZone];
+    Nbnxm::Grid  &grid = grids_[gridIndex];
 
     int           cellOffset;
-    if (ddZone == 0)
+    if (gridIndex == 0)
     {
         cellOffset = 0;
     }
     else
     {
-        const Nbnxm::Grid &previousGrid = grids_[ddZone - 1];
+        const Nbnxm::Grid &previousGrid = grids_[gridIndex - 1];
         cellOffset = previousGrid.atomIndexEnd()/previousGrid.geometry().numAtomsPerCell;
     }
 
     const int n = atomEnd - atomStart;
 
     real      maxAtomGroupRadius;
-    if (ddZone == 0)
+    if (gridIndex == 0)
     {
         copy_mat(box, box_);
 
         numRealAtomsLocal_ = atomEnd - numAtomsMoved;
         /* We assume that nbnxn_put_on_grid is called first
-         * for the local atoms (ddZone=0).
+         * for the local atoms (gridIndex=0).
          */
         numRealAtomsTotal_ = atomEnd - numAtomsMoved;
 
@@ -182,8 +193,9 @@ void GridSet::putOnGrid(const matrix                    box,
     /* We always use the home zone (grid[0]) for setting the cell size,
      * since determining densities for non-local zones is difficult.
      */
-    // grid data used in GPU transfers inherits the gridset pinnin policy
-    auto pinPolicy = gridSetData_.cells.get_allocator().pinningPolicy();
+    const int ddZone = (domainSetup_.doTestParticleInsertion ? 0 : gridIndex);
+    // grid data used in GPU transfers inherits the gridset pinning policy
+    auto      pinPolicy = gridSetData_.cells.get_allocator().pinningPolicy();
     grid.setDimensions(ddZone, n - numAtomsMoved,
                        lowerCorner, upperCorner,
                        atomDensity,
@@ -220,11 +232,11 @@ void GridSet::putOnGrid(const matrix                    box,
     grid.setCellIndices(ddZone, cellOffset, &gridSetData_, gridWork_,
                         atomStart, atomEnd, atomInfo.data(), x, numAtomsMoved, nbat);
 
-    if (ddZone == 0)
+    if (gridIndex == 0)
     {
         nbat->natoms_local = nbat->numAtoms();
     }
-    if (ddZone == gmx::ssize(grids_) - 1)
+    if (gridIndex == gmx::ssize(grids_) - 1)
     {
         /* We are done setting up all grids, we can resize the force buffers */
         nbat->resizeForceBuffers();
diff --git a/src/gromacs/nbnxm/gridset.h b/src/gromacs/nbnxm/gridset.h
index 38d9650e4f..142a1f28b4 100644
--- a/src/gromacs/nbnxm/gridset.h
+++ b/src/gromacs/nbnxm/gridset.h
@@ -73,6 +73,12 @@ namespace Nbnxm
 
 /*! \internal
  * \brief Holds a set of search grids for the local + non-local DD zones
+ *
+ * The are three different possible setups:
+ * - a single grid, this is the standard case without domain decomposition
+ * - one grid for each domain decomposition zone
+ * - with test particle insertion there are two grids, one for the system
+ *   to insert in and one for the molecule that is inserted
  */
 class GridSet
 {
@@ -84,11 +90,14 @@ class GridSet
         {
             //! Constructor, without DD \p numDDCells and \p ddZones should be nullptr
             DomainSetup(int                       ePBC,
+                        bool                      doTestParticleInsertion,
                         const ivec               *numDDCells,
                         const gmx_domdec_zones_t *ddZones);
 
             //! The type of PBC
             int                       ePBC;
+            //! Tells whether we are doing test-particle insertion
+            bool                      doTestParticleInsertion;
             //! Are there multiple domains?
             bool                      haveMultipleDomains;
             //! Are there multiple domains along each dimension?
@@ -99,6 +108,7 @@ class GridSet
 
         //! Constructs a grid set for 1 or multiple DD zones, when numDDCells!=nullptr
         GridSet(int                       ePBC,
+                bool                      doTestParticleInsertion,
                 const ivec               *numDDCells,
                 const gmx_domdec_zones_t *ddZones,
                 PairlistType              pairlistType,
@@ -106,9 +116,9 @@ class GridSet
                 int                       numThreads,
                 gmx::PinningPolicy        pinningPolicy);
 
-        //! Puts the atoms in \p ddZone on the grid and copies the coordinates to \p nbat
+        //! Puts the atoms on the grid with index \p gridIndex and copies the coordinates to \p nbat
         void putOnGrid(const matrix                    box,
-                       int                             ddZone,
+                       int                             gridIndex,
                        const rvec                      lowerCorner,
                        const rvec                      upperCorner,
                        const gmx::UpdateGroupsCog     *updateGroupsCog,
diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp
index 6715fef9c2..e87e6363c5 100644
--- a/src/gromacs/nbnxm/nbnxm_setup.cpp
+++ b/src/gromacs/nbnxm/nbnxm_setup.cpp
@@ -472,6 +472,7 @@ init_nb_verlet(const gmx::MDLogger     &mdlog,
 
     auto pairSearch =
         std::make_unique<PairSearch>(ir->ePBC,
+                                     EI_TPI(ir->eI),
                                      DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
                                      DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
                                      pairlistParams.pairlistType,
diff --git a/src/gromacs/nbnxm/pairlist.cpp b/src/gromacs/nbnxm/pairlist.cpp
index 43ed81cd32..85bb369207 100644
--- a/src/gromacs/nbnxm/pairlist.cpp
+++ b/src/gromacs/nbnxm/pairlist.cpp
@@ -4041,6 +4041,11 @@ PairlistSet::constructPairlists(const Nbnxm::GridSet          &gridSet,
 
     for (int zi = 0; zi < nzi; zi++)
     {
+        /* With TPI we do grid 1, the inserted molecule, versus grid 0, the rest */
+        if (gridSet.domainSetup().doTestParticleInsertion)
+        {
+            zi = 1;
+        }
         const Grid &iGrid = gridSet.grids()[zi];
 
         int                 zj0;
diff --git a/src/gromacs/nbnxm/pairsearch.cpp b/src/gromacs/nbnxm/pairsearch.cpp
index 021f6cb25a..2d425de5ef 100644
--- a/src/gromacs/nbnxm/pairsearch.cpp
+++ b/src/gromacs/nbnxm/pairsearch.cpp
@@ -109,13 +109,14 @@ PairsearchWork::~PairsearchWork()
 }
 
 PairSearch::PairSearch(const int                 ePBC,
+                       const bool                doTestParticleInsertion,
                        const ivec               *numDDCells,
                        const gmx_domdec_zones_t *ddZones,
                        const PairlistType        pairlistType,
                        const bool                haveFep,
                        const int                 maxNumThreads,
                        gmx::PinningPolicy        pinningPolicy) :
-    gridSet_(ePBC, numDDCells, ddZones, pairlistType, haveFep, maxNumThreads, pinningPolicy),
+    gridSet_(ePBC, doTestParticleInsertion, numDDCells, ddZones, pairlistType, haveFep, maxNumThreads, pinningPolicy),
     work_(maxNumThreads)
 {
     cycleCounting_.recordCycles_ = (getenv("GMX_NBNXN_CYCLE") != nullptr);
diff --git a/src/gromacs/nbnxm/pairsearch.h b/src/gromacs/nbnxm/pairsearch.h
index 10ab4d15f9..7bf8f421a3 100644
--- a/src/gromacs/nbnxm/pairsearch.h
+++ b/src/gromacs/nbnxm/pairsearch.h
@@ -207,6 +207,7 @@ class PairSearch
          * \param[in] maxNumThreads   The maximum number of threads used in the search
          */
         PairSearch(int                       ePBC,
+                   bool                      doTestParticleInsertion,
                    const ivec               *numDDCells,
                    const gmx_domdec_zones_t *zones,
                    PairlistType              pairlistType,
diff --git a/src/gromacs/taskassignment/resourcedivision.cpp b/src/gromacs/taskassignment/resourcedivision.cpp
index 452a771616..a7e7b0a995 100644
--- a/src/gromacs/taskassignment/resourcedivision.cpp
+++ b/src/gromacs/taskassignment/resourcedivision.cpp
@@ -655,7 +655,8 @@ static void print_hw_opt(FILE *fp, const gmx_hw_opt_t *hw_opt)
 void checkAndUpdateHardwareOptions(const gmx::MDLogger &mdlog,
                                    gmx_hw_opt_t        *hw_opt,
                                    const bool           isSimulationMasterRank,
-                                   const int            nPmeRanks)
+                                   const int            nPmeRanks,
+                                   const t_inputrec    *inputrec)
 {
     /* Currently hw_opt only contains default settings or settings supplied
      * by the user on the command line.
@@ -688,6 +689,18 @@ void checkAndUpdateHardwareOptions(const gmx::MDLogger &mdlog,
                       "compiled without thread-MPI");
         }
     }
+
+    /* With thread-MPI we need to handle TPI and #OpenMP-threads=auto early,
+     * so we can parallelize using MPI only. The general check is done later.
+     */
+    if (GMX_THREAD_MPI && isSimulationMasterRank)
+    {
+        GMX_RELEASE_ASSERT(inputrec, "Expect a valid inputrec");
+        if (EI_TPI(inputrec->eI) && hw_opt->nthreads_omp == 0)
+        {
+            hw_opt->nthreads_omp = 1;
+        }
+    }
     /* With thread-MPI the master thread sets hw_opt->totNumThreadsIsAuto.
      * The other threads receive a partially processed hw_opt from the master
      * thread and should not set hw_opt->totNumThreadsIsAuto again.
@@ -801,8 +814,18 @@ void checkAndUpdateRequestedNumOpenmpThreads(gmx_hw_opt_t         *hw_opt,
                                              const gmx_multisim_t *ms,
                                              int                   numRanksOnThisNode,
                                              PmeRunMode            pmeRunMode,
-                                             const gmx_mtop_t     &mtop)
+                                             const gmx_mtop_t     &mtop,
+                                             const t_inputrec     &inputrec)
 {
+    if (EI_TPI(inputrec.eI))
+    {
+        if (hw_opt->nthreads_omp > 1)
+        {
+            gmx_fatal(FARGS, "You requested OpenMP parallelization, which is not supported with TPI.");
+        }
+        hw_opt->nthreads_omp = 1;
+    }
+
     if (GMX_THREAD_MPI)
     {
 
diff --git a/src/gromacs/taskassignment/resourcedivision.h b/src/gromacs/taskassignment/resourcedivision.h
index 6d8f1c2354..2bea6245a0 100644
--- a/src/gromacs/taskassignment/resourcedivision.h
+++ b/src/gromacs/taskassignment/resourcedivision.h
@@ -104,11 +104,13 @@ void check_resource_division_efficiency(const gmx_hw_info_t *hwinfo,
  *  \param[in, out] hw_opt                    Hardware-related and threading options
  *  \param[in]      isSimulationMasterRank
  *  \param[in]      nPmeRanks                 Number of PME ranks
+ *  \param[in]      inputrec                  The input record, should point to a valid object when \p isSimulationMasterRank = true
  *  */
 void checkAndUpdateHardwareOptions(const gmx::MDLogger &mdlog,
                                    gmx_hw_opt_t        *hw_opt,
                                    bool                 isSimulationMasterRank,
-                                   int                  nPmeRanks);
+                                   int                  nPmeRanks,
+                                   const t_inputrec    *inputrec);
 
 /*! \brief Check, and if necessary update, the number of OpenMP threads requested
  *
@@ -120,7 +122,8 @@ void checkAndUpdateRequestedNumOpenmpThreads(gmx_hw_opt_t         *hw_opt,
                                              const gmx_multisim_t *ms,
                                              int                   numRanksOnThisNode,
                                              PmeRunMode            pmeRunMode,
-                                             const gmx_mtop_t     &mtop);
+                                             const gmx_mtop_t     &mtop,
+                                             const t_inputrec     &inputrec);
 
 namespace gmx
 {
diff --git a/src/programs/mdrun/tests/CMakeLists.txt b/src/programs/mdrun/tests/CMakeLists.txt
index d1b4c0f4ad..da17cd990f 100644
--- a/src/programs/mdrun/tests/CMakeLists.txt
+++ b/src/programs/mdrun/tests/CMakeLists.txt
@@ -83,13 +83,26 @@ gmx_add_gtest_executable(
     normalmodes.cpp
     rerun.cpp
     simple_mdrun.cpp
-    tpitest.cpp
     # pseudo-library for code for mdrun
     $<TARGET_OBJECTS:mdrun_objlib>
     )
 target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
 gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST)
 
+# TPI does not support OpenMP, so we need a separate test binary
+set(testname "MdrunTpiTests")
+set(exename "mdrun-tpi-test")
+
+gmx_add_gtest_executable(
+    ${exename}
+    # files with code for tests
+    tpitest.cpp
+    # pseudo-library for code for mdrun
+    $<TARGET_OBJECTS:mdrun_objlib>
+    )
+target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
+gmx_register_gtest_test(${testname} ${exename} INTEGRATION_TEST)
+
 # Tests that only make sense to run with multiple ranks and/or real
 # MPI are implemented here.
 set(testname "MdrunMpiTests")
diff --git a/src/programs/mdrun/tests/tpitest.cpp b/src/programs/mdrun/tests/tpitest.cpp
index 1258a57f9d..b080e4fbff 100644
--- a/src/programs/mdrun/tests/tpitest.cpp
+++ b/src/programs/mdrun/tests/tpitest.cpp
@@ -127,6 +127,7 @@ TEST_P(TpiTest, ReproducesOutput)
         epsilon-r                = 1
         epsilon-rf               = 0
         vdw-type                 = cut-off
+        vdw-modifier             = none
         rvdw                     = 0.9
         Tcoupl                   = no
         tc-grps                  = System
@@ -139,8 +140,7 @@ TEST_P(TpiTest, ReproducesOutput)
     runTest();
 }
 
-// Should be re-enabled when TPI supports the Verlet scheme
-INSTANTIATE_TEST_CASE_P(DISABLED_Simple, TpiTest, ::testing::Values(1993, 2994));
+INSTANTIATE_TEST_CASE_P(Simple, TpiTest, ::testing::Values(1993, 2994));
 
 }  // namespace
 }  // namespace test
-- 
2.22.0