From cec56078a19426cec84ef14f3ce9597deab7fd74 Mon Sep 17 00:00:00 2001 From: Mark Abraham Date: Wed, 18 Aug 2021 08:04:59 +0000 Subject: [PATCH] Ensure restart with update groups always works --- docs/release-notes/2021/2021.3.rst | 10 ++++++++ src/gromacs/domdec/domdec.cpp | 39 ++++++++++++++++++++++++++++++ src/gromacs/domdec/domdec.h | 25 ++++++++++++++++++- src/gromacs/mdrun/runner.cpp | 13 ++++++++++ 4 files changed, 86 insertions(+), 1 deletion(-) diff --git a/docs/release-notes/2021/2021.3.rst b/docs/release-notes/2021/2021.3.rst index b9ff941e91..b132c4acef 100644 --- a/docs/release-notes/2021/2021.3.rst +++ b/docs/release-notes/2021/2021.3.rst @@ -44,6 +44,16 @@ https://gitlab.com/gromacs/gromacs/-/tree/master/python_packaging/sample_restrai :issue:`4078` and :issue:`4102` +Fixed multi-rank restarts from checkpoints written by single-rank simulations +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Currently a single-rank simulation never uses update groups, however a +multi-rank run can do so. This fix ensures that the atoms within +update groups always start in the same periodic image, which was not +guaranteed if the checkpoint was written by a single-rank simulation. + +:issue:`4016` + Fixes for ``gmx`` tools ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp index 8e27798962..d75345d592 100644 --- a/src/gromacs/domdec/domdec.cpp +++ b/src/gromacs/domdec/domdec.cpp @@ -3221,3 +3221,42 @@ void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces) } } } + +void putUpdateGroupAtomsInSamePeriodicImage(const gmx_domdec_t& dd, + const gmx_mtop_t& mtop, + const matrix box, + gmx::ArrayRef positions) +{ + int atomOffset = 0; + for (const gmx_molblock_t& molblock : mtop.molblock) + { + const auto& updateGrouping = dd.comm->systemInfo.updateGroupingPerMoleculetype[molblock.type]; + + for (int mol = 0; mol < molblock.nmol; mol++) + { + for (int g = 0; g < updateGrouping.numBlocks(); g++) + { + const auto& block = updateGrouping.block(g); + const int atomBegin = atomOffset + block.begin(); + const int atomEnd = atomOffset + block.end(); + for (int a = atomBegin + 1; a < atomEnd; a++) + { + // Make sure that atoms in the same update group + // are in the same periodic image after restarts. + for (int d = DIM - 1; d >= 0; d--) + { + while (positions[a][d] - positions[atomBegin][d] > 0.5_real * box[d][d]) + { + positions[a] -= box[d]; + } + while (positions[a][d] - positions[atomBegin][d] < -0.5_real * box[d][d]) + { + positions[a] += box[d]; + } + } + } + } + atomOffset += updateGrouping.fullRange().end(); + } + } +} diff --git a/src/gromacs/domdec/domdec.h b/src/gromacs/domdec/domdec.h index 2b65e1989b..8939ac35e7 100644 --- a/src/gromacs/domdec/domdec.h +++ b/src/gromacs/domdec/domdec.h @@ -3,7 +3,7 @@ * * Copyright (c) 2005 - 2014, The GROMACS development team. * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team. - * Copyright (c) 2020, by the GROMACS development team, led by + * Copyright (c) 2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -345,4 +345,27 @@ void communicateGpuHaloCoordinates(const t_commrec& cr, */ void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces); +/*! \brief Wraps the \c positions so that atoms from the same + * update group share the same periodic image wrt \c box. + * + * When DD and update groups are in use, the simulation master rank + * should call this to ensure that e.g. when restarting a simulation + * that did not use update groups that the coordinates satisfy the new + * requirements. + * + * This function can probably be removed when even single-rank + * simulations use domain decomposition, because then the choice of + * whether update groups are used is probably going to be the same + * regardless of the rank count. + * + * \param[in] dd The DD manager + * \param[in] mtop The system topology + * \param[in] box The global system box + * \param[in] positions The global system positions + */ +void putUpdateGroupAtomsInSamePeriodicImage(const gmx_domdec_t& dd, + const gmx_mtop_t& mtop, + const matrix box, + gmx::ArrayRef positions); + #endif diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 2a5a7c2fe8..8d19c598c2 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -1232,6 +1232,19 @@ int Mdrunner::mdrunner() ddBuilder.reset(nullptr); // Note that local state still does not exist yet. } + // Ensure that all atoms within the same update group are in the + // same periodic image. Otherwise, a simulation that did not use + // update groups (e.g. a single-rank simulation) cannot always be + // correctly restarted in a way that does use update groups + // (e.g. a multi-rank simulation). + if (isSimulationMasterRank) + { + const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false; + if (useUpdateGroups) + { + putUpdateGroupAtomsInSamePeriodicImage(*cr->dd, mtop, globalState->box, globalState->x); + } + } // The GPU update is decided here because we need to know whether the constraints or // SETTLEs can span accross the domain borders (i.e. whether or not update groups are -- 2.22.0