From 133c12ee12284879ed75c910de3887bbc3dab68f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= Date: Wed, 22 Jan 2020 17:07:01 +0100 Subject: [PATCH] Fix deadlock in PP-PME balancing startup delay The condition whether load balancing should start included a per-rank computed fixed time-delay. Therefore different ranks could evaluate this condition differently resulting in a deadlock. This change fixes the deadlock by broadcasting the result of the time delay check. Fixes #3335 Change-Id: I39ebc7e99483a6837bdbd79e312148384c1966b3 --- docs/release-notes/2020/2020.1.rst | 8 ++++++ src/gromacs/ewald/pme_load_balancing.cpp | 31 ++++++++++++++++++------ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/docs/release-notes/2020/2020.1.rst b/docs/release-notes/2020/2020.1.rst index 5617053e1d..f99a71ed90 100644 --- a/docs/release-notes/2020/2020.1.rst +++ b/docs/release-notes/2020/2020.1.rst @@ -21,6 +21,14 @@ Fix fatal error with mdrun -multidir with more than 1 rank per simulation :issue:`3296` +Fix deadlock in mdrun runs with multiple ranks and separate PME ranks +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +When multiple PP ranks as well as separate PME ranks are used, mdrun could +deadlock before starting the PP-PME balancing. + +:issue:`3335` + Avoid mdrun assertion failure when running with shells and update on a GPU """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" diff --git a/src/gromacs/ewald/pme_load_balancing.cpp b/src/gromacs/ewald/pme_load_balancing.cpp index e0431a76cd..4cd569105d 100644 --- a/src/gromacs/ewald/pme_load_balancing.cpp +++ b/src/gromacs/ewald/pme_load_balancing.cpp @@ -2,6 +2,7 @@ * This file is part of the GROMACS molecular simulation package. * * Copyright (c) 2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -160,6 +161,7 @@ struct pme_load_balancing_t gmx_bool bTriggerOnDLB; /**< trigger balancing only on DD DLB */ gmx_bool bBalance; /**< are we in the balancing phase, i.e. trying different setups? */ int nstage; /**< the current maximum number of stages */ + bool startupTimeDelayElapsed; /**< Has the c_startupTimeDelay elapsed indicating that the balancing can start. */ real cut_spacing; /**< the minimum cutoff / PME grid spacing ratio */ real rcut_vdw; /**< Vdw cutoff (does not change) */ @@ -180,9 +182,9 @@ struct pme_load_balancing_t int stage; /**< the current stage */ - int cycles_n; /**< step cycle counter cumulative count */ - double cycles_c; /**< step cycle counter cumulative cycles */ - double startTime; /**< time stamp when the balancing was started (relative to the UNIX epoch start).*/ + int cycles_n; /**< step cycle counter cumulative count */ + double cycles_c; /**< step cycle counter cumulative cycles */ + double startTime; /**< time stamp when the balancing was started on the master rank (relative to the UNIX epoch start).*/ }; /* TODO The code in this file should call this getter, rather than @@ -289,9 +291,13 @@ void pme_loadbal_init(pme_load_balancing_t** pme_lb_p, pme_lb->end = 0; pme_lb->elimited = epmelblimNO; - pme_lb->cycles_n = 0; - pme_lb->cycles_c = 0; - pme_lb->startTime = gmx_gettime(); + pme_lb->cycles_n = 0; + pme_lb->cycles_c = 0; + // only master ranks do timing + if (!PAR(cr) || (DOMAINDECOMP(cr) && DDMASTER(cr->dd))) + { + pme_lb->startTime = gmx_gettime(); + } if (!wallcycle_have_counter()) { @@ -921,11 +927,19 @@ void pme_loadbal_do(pme_load_balancing_t* pme_lb, * We also want to skip a number of steps and seconds while * the CPU and GPU, when used, performance stabilizes. */ + if (!PAR(cr) || (DOMAINDECOMP(cr) && DDMASTER(cr->dd))) + { + pme_lb->startupTimeDelayElapsed = (gmx_gettime() - pme_lb->startTime < c_startupTimeDelay); + } + if (DOMAINDECOMP(cr)) + { + dd_bcast(cr->dd, sizeof(bool), &pme_lb->startupTimeDelayElapsed); + } + if (pme_lb->cycles_n == 0 || step_rel < c_numFirstTuningIntervalSkip * ir.nstlist - || gmx_gettime() - pme_lb->startTime < c_startupTimeDelay) + || pme_lb->startupTimeDelayElapsed) { *bPrinting = FALSE; - return; } /* Sanity check, we expect nstlist cycle counts */ @@ -950,6 +964,7 @@ void pme_loadbal_do(pme_load_balancing_t* pme_lb, */ else if (step_rel >= c_numFirstTuningIntervalSkipWithSepPme * ir.nstlist) { + GMX_ASSERT(DOMAINDECOMP(cr), "Domain decomposition should be active here"); if (DDMASTER(cr->dd)) { /* If PME rank load is too high, start tuning. If -- 2.22.0