From: Magnus Lundborg Date: Fri, 23 Nov 2018 13:11:37 +0000 (+0100) Subject: Make pull with COM from previous step work with MPI X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=707a94f615f0abe3523a4ed04836aa5b031a1b19;p=alexxy%2Fgromacs.git Make pull with COM from previous step work with MPI There was no communication between the ranks, which caused crashes with MPI and tMPI. This fixes that. Minor clean-ups of pull with COM from previous step as well. Fixes #2769 Change-Id: I3b321872ffd4b295c4e97029d8d54872b3674ac4 --- diff --git a/src/gromacs/domdec/collect.cpp b/src/gromacs/domdec/collect.cpp index a857292b29..5b9c6d5c07 100644 --- a/src/gromacs/domdec/collect.cpp +++ b/src/gromacs/domdec/collect.cpp @@ -301,7 +301,8 @@ void dd_collect_state(gmx_domdec_t *dd, state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j]; } } - state->baros_integral = state_local->baros_integral; + state->baros_integral = state_local->baros_integral; + state->pull_com_prev_step = state_local->pull_com_prev_step; } if (state_local->flags & (1 << estX)) { diff --git a/src/gromacs/fileio/checkpoint.cpp b/src/gromacs/fileio/checkpoint.cpp index 7eb8805389..333342bc43 100644 --- a/src/gromacs/fileio/checkpoint.cpp +++ b/src/gromacs/fileio/checkpoint.cpp @@ -1227,7 +1227,7 @@ static int do_cpt_state(XDR *xd, case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break; case estORIRE_INITF: ret = do_cpte_real (xd, part, i, sflags, &state->hist.orire_initf, list); break; case estORIRE_DTAV: ret = do_cpte_n_reals(xd, part, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break; - case estPREVSTEPCOM: ret = doVector(xd, part, i, sflags, &state->com_prev_step, list); break; + case estPULLCOMPREVSTEP: ret = doVector(xd, part, i, sflags, &state->pull_com_prev_step, list); break; default: gmx_fatal(FARGS, "Unknown state entry %d\n" "You are reading a checkpoint file written by different code, which is not supported", i); diff --git a/src/gromacs/mdlib/md_support.cpp b/src/gromacs/mdlib/md_support.cpp index 1de02364fa..85fac9fa33 100644 --- a/src/gromacs/mdlib/md_support.cpp +++ b/src/gromacs/mdlib/md_support.cpp @@ -622,6 +622,6 @@ void set_state_entries(t_state *state, const t_inputrec *ir) if (ir->pull && ir->pull->bSetPbcRefToPrevStepCOM) { - state->flags |= (1<flags |= (1<pull && ir->pull->bSetPbcRefToPrevStepCOM) - { - /* Copy the pull group COM of the previous step from the checkpoint state to the pull state */ - setPrevStepPullComFromState(ir->pull_work, state); - } - } - else if (ir->pull && ir->pull->bSetPbcRefToPrevStepCOM) - { - allocStatePrevStepPullCom(state, ir->pull_work); - t_pbc pbc; - set_pbc(&pbc, ir->ePBC, state->box); - initPullComFromPrevStep(cr, ir->pull_work, mdatoms, &pbc, as_rvec_array(state->x.data())); - updatePrevStepCom(ir->pull_work); - setStatePrevStepPullCom(ir->pull_work, state); } if (observablesHistory->energyHistory == nullptr) { @@ -402,6 +388,8 @@ void gmx::Integrator::do_md() update_energyhistory(observablesHistory->energyHistory.get(), mdebin); } + preparePrevStepPullCom(ir, mdatoms, state, state_global, cr, startingFromCheckpoint); + // TODO: Remove this by converting AWH into a ForceProvider auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingFromCheckpoint, shellfc != nullptr, @@ -1172,10 +1160,9 @@ void gmx::Integrator::do_md() state, graph, nrnb, wcycle, upd, constr); - if (MASTER(cr) && ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM) + if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM) { - updatePrevStepCom(ir->pull_work); - setStatePrevStepPullCom(ir->pull_work, state); + updatePrevStepPullCom(ir->pull_work, state); } if (ir->eI == eiVVAK) diff --git a/src/gromacs/mdtypes/state.h b/src/gromacs/mdtypes/state.h index 6e240b32a0..1516daac57 100644 --- a/src/gromacs/mdtypes/state.h +++ b/src/gromacs/mdtypes/state.h @@ -94,7 +94,7 @@ enum { estORIRE_INITF, estORIRE_DTAV, estSVIR_PREV, estNH_VXI, estVETA, estVOL0, estNHPRES_XI, estNHPRES_VXI, estFVIR_PREV, estFEPSTATE, estMC_RNG_NOTSUPPORTED, estMC_RNGI_NOTSUPPORTED, - estBAROS_INT, estPREVSTEPCOM, + estBAROS_INT, estPULLCOMPREVSTEP, estNR }; @@ -217,15 +217,15 @@ class t_state ekinstate_t ekinstate; //!< The state of the kinetic energy /* History for special algorithms, should be moved to a history struct */ - history_t hist; //!< Time history for restraints - df_history_t *dfhist; //!< Free-energy history for free energy analysis - std::shared_ptr awhHistory; //!< Accelerated weight histogram history + history_t hist; //!< Time history for restraints + df_history_t *dfhist; //!< Free-energy history for free energy analysis + std::shared_ptr awhHistory; //!< Accelerated weight histogram history - int ddp_count; //!< The DD partitioning count for this state - int ddp_count_cg_gl; //!< The DD partitioning count for index_gl - std::vector cg_gl; //!< The global cg number of the local cgs + int ddp_count; //!< The DD partitioning count for this state + int ddp_count_cg_gl; //!< The DD partitioning count for index_gl + std::vector cg_gl; //!< The global cg number of the local cgs - std::vector com_prev_step; //!< The COM of the previous step of each pull group + std::vector pull_com_prev_step; //!< The COM of the previous step of each pull group }; #ifndef DOXYGEN diff --git a/src/gromacs/pulling/pull.cpp b/src/gromacs/pulling/pull.cpp index fd60165a02..f9ae38483a 100644 --- a/src/gromacs/pulling/pull.cpp +++ b/src/gromacs/pulling/pull.cpp @@ -65,6 +65,7 @@ #include "gromacs/mdtypes/inputrec.h" #include "gromacs/mdtypes/md_enums.h" #include "gromacs/mdtypes/mdatom.h" +#include "gromacs/mdtypes/state.h" #include "gromacs/pbcutil/pbc.h" #include "gromacs/topology/mtop_lookup.h" #include "gromacs/topology/topology.h" @@ -1691,6 +1692,21 @@ void dd_make_local_pull_groups(const t_commrec *cr, struct pull_t *pull) comm->bParticipate = bWillParticipate; comm->nparticipate = count[0]; + + /* When we use the previous COM for PBC, we need to broadcast + * the previous COM to ranks that have joined the communicator. + */ + for (pull_group_work_t &group : pull->group) + { + if (group.epgrppbc == epgrppbcPREVSTEPCOM) + { + GMX_ASSERT(comm->bParticipate || !MASTER(cr), + "The master rank has to participate, as it should pass an up to date prev. COM " + "to bcast here as well as to e.g. checkpointing"); + + gmx_bcast(sizeof(dvec), group.x_prev_step, cr); + } + } } } @@ -1877,7 +1893,7 @@ init_pull(FILE *fplog, const pull_params_t *pull_params, const t_inputrec *ir, /* Set up the global to local atom mapping for PBC atoms */ for (pull_group_work_t &group : pull->group) { - if (group.epgrppbc == epgrppbcREFAT) + if (group.epgrppbc == epgrppbcREFAT || group.epgrppbc == epgrppbcPREVSTEPCOM) { /* pbcAtomSet consists of a single atom */ group.pbcAtomSet = gmx::compat::make_unique(atomSets->add({&group.params.pbcatom, &group.params.pbcatom + 1})); @@ -1893,7 +1909,6 @@ init_pull(FILE *fplog, const pull_params_t *pull_params, const t_inputrec *ir, pull->bFOutAverage = pull_params->bFOutAverage; GMX_RELEASE_ASSERT(pull->group[0].params.nat == 0, "pull group 0 is an absolute reference group and should not contain atoms"); - pull->group[0].x_prev_step[XX] = NAN; pull->numCoordinatesWithExternalPotential = 0; @@ -2160,8 +2175,6 @@ init_pull(FILE *fplog, const pull_params_t *pull_params, const t_inputrec *ir, init_pull_group_index(fplog, cr, g, pgrp, bConstraint, pulldim_con, mtop, ir, lambda); - - pgrp->x_prev_step[XX] = NAN; } else { @@ -2253,6 +2266,35 @@ static void destroy_pull(struct pull_t *pull) delete pull; } +void preparePrevStepPullCom(const t_inputrec *ir, const t_mdatoms *md, t_state *state, const t_state *state_global, const t_commrec *cr, bool startingFromCheckpoint) +{ + if (!ir->pull || !ir->pull->bSetPbcRefToPrevStepCOM) + { + return; + } + allocStatePrevStepPullCom(state, ir->pull_work); + if (startingFromCheckpoint) + { + if (MASTER(cr)) + { + state->pull_com_prev_step = state_global->pull_com_prev_step; + } + if (PAR(cr)) + { + /* Only the master rank has the checkpointed COM from the previous step */ + gmx_bcast(sizeof(double) * state->pull_com_prev_step.size(), &state->pull_com_prev_step[0], cr); + } + setPrevStepPullComFromState(ir->pull_work, state); + } + else + { + t_pbc pbc; + set_pbc(&pbc, ir->ePBC, state->box); + initPullComFromPrevStep(cr, ir->pull_work, md, &pbc, state->x.rvec_array()); + updatePrevStepPullCom(ir->pull_work, state); + } +} + void finish_pull(struct pull_t *pull) { check_external_potential_registration(pull); diff --git a/src/gromacs/pulling/pull.h b/src/gromacs/pulling/pull.h index 27f4848b98..9fd108e3fb 100644 --- a/src/gromacs/pulling/pull.h +++ b/src/gromacs/pulling/pull.h @@ -347,32 +347,25 @@ gmx_bool pull_have_constraint(const struct pull_t *pull); real max_pull_distance2(const pull_coord_work_t *pcrd, const t_pbc *pbc); -/*! \brief Copies the COM from the previous step of all pull groups to the checkpoint state container +/*! \brief Sets the previous step COM in pull to the current COM and updates the pull_com_prev_step in the state * * \param[in] pull The COM pull force calculation data structure - * \param[in] state The global state container + * \param[in] state The local (to this rank) state. */ -void setStatePrevStepPullCom(const struct pull_t *pull, t_state *state); +void updatePrevStepPullCom(struct pull_t *pull, t_state *state); -/*! \brief Copies the pull group COM of the previous step from the checkpoint state to the pull state +/*! \brief Allocates, initializes and communicates the previous step pull COM (if that option is set to true). * - * \param[in] pull The COM pull force calculation data structure - * \param[in] state The global state container - */ -void setPrevStepPullComFromState(struct pull_t *pull, const t_state *state); - -/*! \brief Sets the previous step COM to the current COM + * If ir->pull->bSetPbcRefToPrevStepCOM is not true nothing is done. * - * \param[in] pull The COM pull force calculation data structure - */ -void updatePrevStepCom(struct pull_t *pull); - -/*! \brief Resizes the vector, in the state container, containing the COMs from the previous step - * - * \param[in] state The global state container - * \param[in] pull The COM pull force calculation data structure + * \param[in] ir The input options/settings of the simulation. + * \param[in] md All atoms. + * \param[in] state The local (to this rank) state. + * \param[in] state_global The global state. + * \param[in] cr Struct for communication info. + * \param[in] startingFromCheckpoint Is the simulation starting from a checkpoint? */ -void allocStatePrevStepPullCom(t_state *state, pull_t *pull); +void preparePrevStepPullCom(const t_inputrec *ir, const t_mdatoms *md, t_state *state, const t_state *state_global, const t_commrec *cr, bool startingFromCheckpoint); /*! \brief Initializes the COM of the previous step (set to initial COM) * diff --git a/src/gromacs/pulling/pull_internal.h b/src/gromacs/pulling/pull_internal.h index f3edeb9254..9084b07714 100644 --- a/src/gromacs/pulling/pull_internal.h +++ b/src/gromacs/pulling/pull_internal.h @@ -259,4 +259,19 @@ struct pull_t int numExternalPotentialsStillToBeAppliedThisStep; }; +/*! \brief Copies the pull group COM of the previous step from the checkpoint state to the pull state + * + * \param[in] pull The COM pull force calculation data structure + * \param[in] state The global state container + */ +void setPrevStepPullComFromState(struct pull_t *pull, const t_state *state); + +/*! \brief Resizes the vector, in the state container, containing the COMs from the previous step + * + * \param[in] state The global state container + * \param[in] pull The COM pull force calculation data structure + */ +void allocStatePrevStepPullCom(t_state *state, pull_t *pull); + + #endif diff --git a/src/gromacs/pulling/pullutil.cpp b/src/gromacs/pulling/pullutil.cpp index b9732e40ee..6d686f4689 100644 --- a/src/gromacs/pulling/pullutil.cpp +++ b/src/gromacs/pulling/pullutil.cpp @@ -990,29 +990,18 @@ bool pullCheckPbcWithinGroup(const pull_t &pull, return (pullGroupObeysPbcRestrictions(group, dimUsed, as_rvec_array(x.data()), pbc, pull.comm.pbcAtomBuffer[groupNr], pbcMargin)); } -void setStatePrevStepPullCom(const struct pull_t *pull, t_state *state) -{ - for (size_t i = 0; i < state->com_prev_step.size()/DIM; i++) - { - for (int j = 0; j < DIM; j++) - { - state->com_prev_step[i*DIM+j] = pull->group[i].x_prev_step[j]; - } - } -} - void setPrevStepPullComFromState(struct pull_t *pull, const t_state *state) { - for (size_t i = 0; i < state->com_prev_step.size()/DIM; i++) + for (size_t g = 0; g < pull->group.size(); g++) { for (int j = 0; j < DIM; j++) { - pull->group[i].x_prev_step[j] = state->com_prev_step[i*DIM+j]; + pull->group[g].x_prev_step[j] = state->pull_com_prev_step[g*DIM+j]; } } } -void updatePrevStepCom(struct pull_t *pull) +void updatePrevStepPullCom(struct pull_t *pull, t_state *state) { for (size_t g = 0; g < pull->group.size(); g++) { @@ -1020,7 +1009,8 @@ void updatePrevStepCom(struct pull_t *pull) { for (int j = 0; j < DIM; j++) { - pull->group[g].x_prev_step[j] = pull->group[g].x[j]; + pull->group[g].x_prev_step[j] = pull->group[g].x[j]; + state->pull_com_prev_step[g*DIM+j] = pull->group[g].x[j]; } } } @@ -1030,13 +1020,13 @@ void allocStatePrevStepPullCom(t_state *state, pull_t *pull) { if (!pull) { - state->com_prev_step.clear(); + state->pull_com_prev_step.clear(); return; } size_t ngroup = pull->group.size(); - if (state->com_prev_step.size()/DIM != ngroup) + if (state->pull_com_prev_step.size()/DIM != ngroup) { - state->com_prev_step.resize(ngroup * DIM, NAN); + state->pull_com_prev_step.resize(ngroup * DIM, NAN); } } @@ -1049,8 +1039,16 @@ void initPullComFromPrevStep(const t_commrec *cr, pull_comm_t *comm = &pull->comm; size_t ngroup = pull->group.size(); - comm->pbcAtomBuffer.resize(ngroup); - comm->comBuffer.resize(ngroup*DIM); + if (!comm->bParticipate) + { + return; + } + + GMX_ASSERT(comm->pbcAtomBuffer.size() == pull->group.size(), "pbcAtomBuffer should have size number of groups"); + GMX_ASSERT(comm->comBuffer.size() == pull->group.size()*c_comBufferStride, + "comBuffer should have size #group*c_comBufferStride"); + + pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer); for (size_t g = 0; g < ngroup; g++) { @@ -1064,7 +1062,6 @@ void initPullComFromPrevStep(const t_commrec *cr, "use the COM from the previous step as reference."); rvec x_pbc = { 0, 0, 0 }; - pull_set_pbcatoms(cr, pull, x, comm->pbcAtomBuffer); copy_rvec(comm->pbcAtomBuffer[g], x_pbc); if (debug) @@ -1118,11 +1115,14 @@ void initPullComFromPrevStep(const t_commrec *cr, } /* Copy local sums to a buffer for global summing */ - copy_dvec(comSumsTotal.sum_wmx, comm->comBuffer[g*3]); - copy_dvec(comSumsTotal.sum_wmxp, comm->comBuffer[g*3 + 1]); - comm->comBuffer[g*3 + 2][0] = comSumsTotal.sum_wm; - comm->comBuffer[g*3 + 2][1] = comSumsTotal.sum_wwm; - comm->comBuffer[g*3 + 2][2] = 0; + auto localSums = + gmx::arrayRefFromArray(comm->comBuffer.data() + g*c_comBufferStride, c_comBufferStride); + + localSums[0] = comSumsTotal.sum_wmx; + localSums[1] = comSumsTotal.sum_wmxp; + localSums[2][0] = comSumsTotal.sum_wm; + localSums[2][1] = comSumsTotal.sum_wwm; + localSums[2][2] = 0; } } @@ -1137,11 +1137,13 @@ void initPullComFromPrevStep(const t_commrec *cr, { if (pgrp->epgrppbc == epgrppbcPREVSTEPCOM) { + auto localSums = + gmx::arrayRefFromArray(comm->comBuffer.data() + g*c_comBufferStride, c_comBufferStride); double wmass, wwmass; /* Determine the inverse mass */ - wmass = comm->comBuffer[g*3+2][0]; - wwmass = comm->comBuffer[g*3+2][1]; + wmass = localSums[2][0]; + wwmass = localSums[2][1]; pgrp->mwscale = 1.0/wmass; /* invtm==0 signals a frozen group, so then we should keep it zero */ if (pgrp->invtm != 0) @@ -1152,11 +1154,8 @@ void initPullComFromPrevStep(const t_commrec *cr, /* Divide by the total mass */ for (int m = 0; m < DIM; m++) { - pgrp->x[m] = comm->comBuffer[g*3 ][m]*pgrp->mwscale; - if (pgrp->epgrppbc == epgrppbcREFAT || pgrp->epgrppbc == epgrppbcPREVSTEPCOM) - { - pgrp->x[m] += comm->pbcAtomBuffer[g][m]; - } + pgrp->x[m] = localSums[0][m]*pgrp->mwscale; + pgrp->x[m] += comm->pbcAtomBuffer[g][m]; } if (debug) {