Make pull with COM from previous step work with MPI
authorMagnus Lundborg <lundborg.magnus@gmail.com>
Fri, 23 Nov 2018 13:11:37 +0000 (14:11 +0100)
committerMark Abraham <mark.j.abraham@gmail.com>
Thu, 29 Nov 2018 09:36:00 +0000 (10:36 +0100)
There was no communication between the ranks, which caused
crashes with MPI and tMPI. This fixes that.
Minor clean-ups of pull with COM from previous step as well.
Fixes #2769

Change-Id: I3b321872ffd4b295c4e97029d8d54872b3674ac4

src/gromacs/domdec/collect.cpp
src/gromacs/fileio/checkpoint.cpp
src/gromacs/mdlib/md_support.cpp
src/gromacs/mdrun/md.cpp
src/gromacs/mdtypes/state.h
src/gromacs/pulling/pull.cpp
src/gromacs/pulling/pull.h
src/gromacs/pulling/pull_internal.h
src/gromacs/pulling/pullutil.cpp

index a857292b296ddd27d5012c5fb27e970b14fa6712..5b9c6d5c07dd55b6770b79c548416fbc9c4333a7 100644 (file)
@@ -301,7 +301,8 @@ void dd_collect_state(gmx_domdec_t *dd,
                 state->nhpres_vxi[i*nh+j]       = state_local->nhpres_vxi[i*nh+j];
             }
         }
-        state->baros_integral = state_local->baros_integral;
+        state->baros_integral      = state_local->baros_integral;
+        state->pull_com_prev_step  = state_local->pull_com_prev_step;
     }
     if (state_local->flags & (1 << estX))
     {
index 7eb8805389276d98a3a3ade887e2bd68b92e1e29..333342bc4319081a919c065e953d7f7b94e74a03 100644 (file)
@@ -1227,7 +1227,7 @@ static int do_cpt_state(XDR *xd,
                 case estDISRE_RM3TAV: ret         = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
                 case estORIRE_INITF:  ret         = do_cpte_real (xd, part, i, sflags, &state->hist.orire_initf, list); break;
                 case estORIRE_DTAV:   ret         = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
-                case estPREVSTEPCOM:  ret         = doVector<double>(xd, part, i, sflags, &state->com_prev_step, list); break;
+                case estPULLCOMPREVSTEP:      ret = doVector<double>(xd, part, i, sflags, &state->pull_com_prev_step, list); break;
                 default:
                     gmx_fatal(FARGS, "Unknown state entry %d\n"
                               "You are reading a checkpoint file written by different code, which is not supported", i);
index 1de02364faac63864af381efad3d07e325c46f0c..85fac9fa337f725876684c61f46b8c78f9630e23 100644 (file)
@@ -622,6 +622,6 @@ void set_state_entries(t_state *state, const t_inputrec *ir)
 
     if (ir->pull && ir->pull->bSetPbcRefToPrevStepCOM)
     {
-        state->flags |= (1<<estPREVSTEPCOM);
+        state->flags |= (1<<estPULLCOMPREVSTEP);
     }
 }
index f00d6d1256a32562c7352dace4ae3d33466c7676..ea12aac0e429746545af394c2bac8937042e55b8 100644 (file)
@@ -375,20 +375,6 @@ void gmx::Integrator::do_md()
                  * should not be reset.
                  */
             }
-            if (ir->pull && ir->pull->bSetPbcRefToPrevStepCOM)
-            {
-                /* Copy the pull group COM of the previous step from the checkpoint state to the pull state */
-                setPrevStepPullComFromState(ir->pull_work, state);
-            }
-        }
-        else if (ir->pull && ir->pull->bSetPbcRefToPrevStepCOM)
-        {
-            allocStatePrevStepPullCom(state, ir->pull_work);
-            t_pbc pbc;
-            set_pbc(&pbc, ir->ePBC, state->box);
-            initPullComFromPrevStep(cr, ir->pull_work, mdatoms, &pbc, as_rvec_array(state->x.data()));
-            updatePrevStepCom(ir->pull_work);
-            setStatePrevStepPullCom(ir->pull_work, state);
         }
         if (observablesHistory->energyHistory == nullptr)
         {
@@ -402,6 +388,8 @@ void gmx::Integrator::do_md()
         update_energyhistory(observablesHistory->energyHistory.get(), mdebin);
     }
 
+    preparePrevStepPullCom(ir, mdatoms, state, state_global, cr, startingFromCheckpoint);
+
     // TODO: Remove this by converting AWH into a ForceProvider
     auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingFromCheckpoint,
                                 shellfc != nullptr,
@@ -1172,10 +1160,9 @@ void gmx::Integrator::do_md()
                       state, graph,
                       nrnb, wcycle, upd, constr);
 
-        if (MASTER(cr) && ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
+        if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
         {
-            updatePrevStepCom(ir->pull_work);
-            setStatePrevStepPullCom(ir->pull_work, state);
+            updatePrevStepPullCom(ir->pull_work, state);
         }
 
         if (ir->eI == eiVVAK)
index 6e240b32a081cb5de67e239694940cd080e1b227..1516daac57f390b6a9eb3ce6d77f82def066e41f 100644 (file)
@@ -94,7 +94,7 @@ enum {
     estORIRE_INITF, estORIRE_DTAV,
     estSVIR_PREV, estNH_VXI, estVETA, estVOL0, estNHPRES_XI, estNHPRES_VXI, estFVIR_PREV,
     estFEPSTATE, estMC_RNG_NOTSUPPORTED, estMC_RNGI_NOTSUPPORTED,
-    estBAROS_INT, estPREVSTEPCOM,
+    estBAROS_INT, estPULLCOMPREVSTEP,
     estNR
 };
 
@@ -217,15 +217,15 @@ class t_state
         ekinstate_t                ekinstate;      //!< The state of the kinetic energy
 
         /* History for special algorithms, should be moved to a history struct */
-        history_t                         hist;            //!< Time history for restraints
-        df_history_t                     *dfhist;          //!< Free-energy history for free energy analysis
-        std::shared_ptr<gmx::AwhHistory>  awhHistory;      //!< Accelerated weight histogram history
+        history_t                         hist;               //!< Time history for restraints
+        df_history_t                     *dfhist;             //!< Free-energy history for free energy analysis
+        std::shared_ptr<gmx::AwhHistory>  awhHistory;         //!< Accelerated weight histogram history
 
-        int                               ddp_count;       //!< The DD partitioning count for this state
-        int                               ddp_count_cg_gl; //!< The DD partitioning count for index_gl
-        std::vector<int>                  cg_gl;           //!< The global cg number of the local cgs
+        int                               ddp_count;          //!< The DD partitioning count for this state
+        int                               ddp_count_cg_gl;    //!< The DD partitioning count for index_gl
+        std::vector<int>                  cg_gl;              //!< The global cg number of the local cgs
 
-        std::vector<double>               com_prev_step;   //!< The COM of the previous step of each pull group
+        std::vector<double>               pull_com_prev_step; //!< The COM of the previous step of each pull group
 };
 
 #ifndef DOXYGEN
index fd60165a02c4969b2b397ffbbefaa102f6c6c064..f9ae38483a80b526a95aa065f69a861044e25bc2 100644 (file)
@@ -65,6 +65,7 @@
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/mdtypes/state.h"
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/topology/mtop_lookup.h"
 #include "gromacs/topology/topology.h"
@@ -1691,6 +1692,21 @@ void dd_make_local_pull_groups(const t_commrec *cr, struct pull_t *pull)
 
             comm->bParticipate = bWillParticipate;
             comm->nparticipate = count[0];
+
+            /* When we use the previous COM for PBC, we need to broadcast
+             * the previous COM to ranks that have joined the communicator.
+             */
+            for (pull_group_work_t &group : pull->group)
+            {
+                if (group.epgrppbc == epgrppbcPREVSTEPCOM)
+                {
+                    GMX_ASSERT(comm->bParticipate || !MASTER(cr),
+                               "The master rank has to participate, as it should pass an up to date prev. COM "
+                               "to bcast here as well as to e.g. checkpointing");
+
+                    gmx_bcast(sizeof(dvec), group.x_prev_step, cr);
+                }
+            }
         }
     }
 
@@ -1877,7 +1893,7 @@ init_pull(FILE *fplog, const pull_params_t *pull_params, const t_inputrec *ir,
         /* Set up the global to local atom mapping for PBC atoms */
         for (pull_group_work_t &group : pull->group)
         {
-            if (group.epgrppbc == epgrppbcREFAT)
+            if (group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM)
             {
                 /* pbcAtomSet consists of a single atom */
                 group.pbcAtomSet = gmx::compat::make_unique<gmx::LocalAtomSet>(atomSets->add({&group.params.pbcatom, &group.params.pbcatom + 1}));
@@ -1893,7 +1909,6 @@ init_pull(FILE *fplog, const pull_params_t *pull_params, const t_inputrec *ir,
     pull->bFOutAverage = pull_params->bFOutAverage;
 
     GMX_RELEASE_ASSERT(pull->group[0].params.nat == 0, "pull group 0 is an absolute reference group and should not contain atoms");
-    pull->group[0].x_prev_step[XX] = NAN;
 
     pull->numCoordinatesWithExternalPotential = 0;
 
@@ -2160,8 +2175,6 @@ init_pull(FILE *fplog, const pull_params_t *pull_params, const t_inputrec *ir,
             init_pull_group_index(fplog, cr, g, pgrp,
                                   bConstraint, pulldim_con,
                                   mtop, ir, lambda);
-
-            pgrp->x_prev_step[XX] = NAN;
         }
         else
         {
@@ -2253,6 +2266,35 @@ static void destroy_pull(struct pull_t *pull)
     delete pull;
 }
 
+void preparePrevStepPullCom(const t_inputrec *ir, const t_mdatoms *md, t_state *state, const t_state *state_global, const t_commrec *cr, bool startingFromCheckpoint)
+{
+    if (!ir->pull || !ir->pull->bSetPbcRefToPrevStepCOM)
+    {
+        return;
+    }
+    allocStatePrevStepPullCom(state, ir->pull_work);
+    if (startingFromCheckpoint)
+    {
+        if (MASTER(cr))
+        {
+            state->pull_com_prev_step = state_global->pull_com_prev_step;
+        }
+        if (PAR(cr))
+        {
+            /* Only the master rank has the checkpointed COM from the previous step */
+            gmx_bcast(sizeof(double) * state->pull_com_prev_step.size(), &state->pull_com_prev_step[0], cr);
+        }
+        setPrevStepPullComFromState(ir->pull_work, state);
+    }
+    else
+    {
+        t_pbc pbc;
+        set_pbc(&pbc, ir->ePBC, state->box);
+        initPullComFromPrevStep(cr, ir->pull_work, md, &pbc, state->x.rvec_array());
+        updatePrevStepPullCom(ir->pull_work, state);
+    }
+}
+
 void finish_pull(struct pull_t *pull)
 {
     check_external_potential_registration(pull);
index 27f4848b9842c525b2d531f32ad13590d458d032..9fd108e3fb1aeb6ff4d63b29ff5b1ff2d9f1c744 100644 (file)
@@ -347,32 +347,25 @@ gmx_bool pull_have_constraint(const struct pull_t *pull);
 real max_pull_distance2(const pull_coord_work_t *pcrd,
                         const t_pbc             *pbc);
 
-/*! \brief Copies the COM from the previous step of all pull groups to the checkpoint state container
+/*! \brief Sets the previous step COM in pull to the current COM and updates the pull_com_prev_step in the state
  *
  * \param[in]   pull  The COM pull force calculation data structure
- * \param[in]   state The global state container
+ * \param[in]   state The local (to this rank) state.
  */
-void setStatePrevStepPullCom(const struct pull_t *pull, t_state *state);
+void updatePrevStepPullCom(struct pull_t *pull, t_state *state);
 
-/*! \brief Copies the pull group COM of the previous step from the checkpoint state to the pull state
+/*! \brief Allocates, initializes and communicates the previous step pull COM (if that option is set to true).
  *
- * \param[in]   pull  The COM pull force calculation data structure
- * \param[in]   state The global state container
- */
-void setPrevStepPullComFromState(struct pull_t *pull, const t_state *state);
-
-/*! \brief Sets the previous step COM to the current COM
+ * If ir->pull->bSetPbcRefToPrevStepCOM is not true nothing is done.
  *
- * \param[in]   pull The COM pull force calculation data structure
- */
-void updatePrevStepCom(struct pull_t *pull);
-
-/*! \brief Resizes the vector, in the state container, containing the COMs from the previous step
- *
- * \param[in]   state The global state container
- * \param[in]   pull  The COM pull force calculation data structure
+ * \param[in] ir                     The input options/settings of the simulation.
+ * \param[in] md                     All atoms.
+ * \param[in] state                  The local (to this rank) state.
+ * \param[in] state_global           The global state.
+ * \param[in] cr                     Struct for communication info.
+ * \param[in] startingFromCheckpoint Is the simulation starting from a checkpoint?
  */
-void allocStatePrevStepPullCom(t_state *state, pull_t *pull);
+void preparePrevStepPullCom(const t_inputrec *ir, const t_mdatoms *md, t_state *state, const t_state *state_global, const t_commrec *cr, bool startingFromCheckpoint);
 
 /*! \brief Initializes the COM of the previous step (set to initial COM)
  *
index f3edeb9254a9fbedb38ef3481e5482c278410576..9084b077149ad8546d64226b8b0f4226659e7eb7 100644 (file)
@@ -259,4 +259,19 @@ struct pull_t
     int                numExternalPotentialsStillToBeAppliedThisStep;
 };
 
+/*! \brief Copies the pull group COM of the previous step from the checkpoint state to the pull state
+ *
+ * \param[in]   pull  The COM pull force calculation data structure
+ * \param[in]   state The global state container
+ */
+void setPrevStepPullComFromState(struct pull_t *pull, const t_state *state);
+
+/*! \brief Resizes the vector, in the state container, containing the COMs from the previous step
+ *
+ * \param[in]   state The global state container
+ * \param[in]   pull  The COM pull force calculation data structure
+ */
+void allocStatePrevStepPullCom(t_state *state, pull_t *pull);
+
+
 #endif
index b9732e40eedbc6f364e8d00c37f6cdfcdea6c1f3..6d686f4689a08906a7d579e634f1c683291ace14 100644 (file)
@@ -990,29 +990,18 @@ bool pullCheckPbcWithinGroup(const pull_t                  &pull,
     return (pullGroupObeysPbcRestrictions(group, dimUsed, as_rvec_array(x.data()), pbc, pull.comm.pbcAtomBuffer[groupNr], pbcMargin));
 }
 
-void setStatePrevStepPullCom(const struct pull_t *pull, t_state *state)
-{
-    for (size_t i = 0; i < state->com_prev_step.size()/DIM; i++)
-    {
-        for (int j = 0; j < DIM; j++)
-        {
-            state->com_prev_step[i*DIM+j] = pull->group[i].x_prev_step[j];
-        }
-    }
-}
-
 void setPrevStepPullComFromState(struct pull_t *pull, const t_state *state)
 {
-    for (size_t i = 0; i < state->com_prev_step.size()/DIM; i++)
+    for (size_t g = 0; g < pull->group.size(); g++)
     {
         for (int j = 0; j < DIM; j++)
         {
-            pull->group[i].x_prev_step[j] = state->com_prev_step[i*DIM+j];
+            pull->group[g].x_prev_step[j] = state->pull_com_prev_step[g*DIM+j];
         }
     }
 }
 
-void updatePrevStepCom(struct pull_t *pull)
+void updatePrevStepPullCom(struct pull_t *pull, t_state *state)
 {
     for (size_t g = 0; g < pull->group.size(); g++)
     {
@@ -1020,7 +1009,8 @@ void updatePrevStepCom(struct pull_t *pull)
         {
             for (int j = 0; j < DIM; j++)
             {
-                pull->group[g].x_prev_step[j] = pull->group[g].x[j];
+                pull->group[g].x_prev_step[j]      = pull->group[g].x[j];
+                state->pull_com_prev_step[g*DIM+j] = pull->group[g].x[j];
             }
         }
     }
@@ -1030,13 +1020,13 @@ void allocStatePrevStepPullCom(t_state *state, pull_t *pull)
 {
     if (!pull)
     {
-        state->com_prev_step.clear();
+        state->pull_com_prev_step.clear();
         return;
     }
     size_t ngroup = pull->group.size();
-    if (state->com_prev_step.size()/DIM != ngroup)
+    if (state->pull_com_prev_step.size()/DIM != ngroup)
     {
-        state->com_prev_step.resize(ngroup * DIM, NAN);
+        state->pull_com_prev_step.resize(ngroup * DIM, NAN);
     }
 }
 
@@ -1049,8 +1039,16 @@ void initPullComFromPrevStep(const t_commrec *cr,
     pull_comm_t *comm   = &pull->comm;
     size_t       ngroup = pull->group.size();
 
-    comm->pbcAtomBuffer.resize(ngroup);
-    comm->comBuffer.resize(ngroup*DIM);
+    if (!comm->bParticipate)
+    {
+        return;
+    }
+
+    GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(), "pbcAtomBuffer should have size number of groups");
+    GMX_ASSERT(comm->comBuffer.size() == pull->group.size()*c_comBufferStride,
+               "comBuffer should have size #group*c_comBufferStride");
+
+    pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
 
     for (size_t g = 0; g < ngroup; g++)
     {
@@ -1064,7 +1062,6 @@ void initPullComFromPrevStep(const t_commrec *cr,
                        "use the COM from the previous step as reference.");
 
             rvec x_pbc = { 0, 0, 0 };
-            pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer);
             copy_rvec(comm->pbcAtomBuffer[g], x_pbc);
 
             if (debug)
@@ -1118,11 +1115,14 @@ void initPullComFromPrevStep(const t_commrec *cr,
             }
 
             /* Copy local sums to a buffer for global summing */
-            copy_dvec(comSumsTotal.sum_wmx,  comm->comBuffer[g*3]);
-            copy_dvec(comSumsTotal.sum_wmxp, comm->comBuffer[g*3 + 1]);
-            comm->comBuffer[g*3 + 2][0] = comSumsTotal.sum_wm;
-            comm->comBuffer[g*3 + 2][1] = comSumsTotal.sum_wwm;
-            comm->comBuffer[g*3 + 2][2] = 0;
+            auto localSums =
+                gmx::arrayRefFromArray(comm->comBuffer.data() + g*c_comBufferStride, c_comBufferStride);
+
+            localSums[0]    = comSumsTotal.sum_wmx;
+            localSums[1]    = comSumsTotal.sum_wmxp;
+            localSums[2][0] = comSumsTotal.sum_wm;
+            localSums[2][1] = comSumsTotal.sum_wwm;
+            localSums[2][2] = 0;
         }
     }
 
@@ -1137,11 +1137,13 @@ void initPullComFromPrevStep(const t_commrec *cr,
         {
             if (pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
             {
+                auto   localSums =
+                    gmx::arrayRefFromArray(comm->comBuffer.data() + g*c_comBufferStride, c_comBufferStride);
                 double wmass, wwmass;
 
                 /* Determine the inverse mass */
-                wmass             = comm->comBuffer[g*3+2][0];
-                wwmass            = comm->comBuffer[g*3+2][1];
+                wmass             = localSums[2][0];
+                wwmass            = localSums[2][1];
                 pgrp->mwscale     = 1.0/wmass;
                 /* invtm==0 signals a frozen group, so then we should keep it zero */
                 if (pgrp->invtm != 0)
@@ -1152,11 +1154,8 @@ void initPullComFromPrevStep(const t_commrec *cr,
                 /* Divide by the total mass */
                 for (int m = 0; m < DIM; m++)
                 {
-                    pgrp->x[m]    = comm->comBuffer[g*3  ][m]*pgrp->mwscale;
-                    if (pgrp->epgrppbc == epgrppbcREFAT || pgrp->epgrppbc == epgrppbcPREVSTEPCOM)
-                    {
-                        pgrp->x[m]     += comm->pbcAtomBuffer[g][m];
-                    }
+                    pgrp->x[m]  = localSums[0][m]*pgrp->mwscale;
+                    pgrp->x[m] += comm->pbcAtomBuffer[g][m];
                 }
                 if (debug)
                 {