src/gromacs/mdrun/runner.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the MD runner routine calling all integrators.
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include "runner.h"
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cinttypes>
  52 #include <csignal>
  53 #include <cstdlib>
  54 #include <cstring>
  55
  56 #include <algorithm>
  57 #include <memory>
  58
  59 #include "gromacs/commandline/filenm.h"
  60 #include "gromacs/domdec/domdec.h"
  61 #include "gromacs/domdec/domdec_struct.h"
  62 #include "gromacs/domdec/gpuhaloexchange.h"
  63 #include "gromacs/domdec/localatomsetmanager.h"
  64 #include "gromacs/domdec/partition.h"
  65 #include "gromacs/ewald/ewald_utils.h"
  66 #include "gromacs/ewald/pme.h"
  67 #include "gromacs/ewald/pme_gpu_program.h"
  68 #include "gromacs/fileio/checkpoint.h"
  69 #include "gromacs/fileio/gmxfio.h"
  70 #include "gromacs/fileio/oenv.h"
  71 #include "gromacs/fileio/tpxio.h"
  72 #include "gromacs/gmxlib/network.h"
  73 #include "gromacs/gmxlib/nrnb.h"
  74 #include "gromacs/gpu_utils/gpu_utils.h"
  75 #include "gromacs/hardware/cpuinfo.h"
  76 #include "gromacs/hardware/detecthardware.h"
  77 #include "gromacs/hardware/printhardware.h"
  78 #include "gromacs/imd/imd.h"
  79 #include "gromacs/listed_forces/disre.h"
  80 #include "gromacs/listed_forces/gpubonded.h"
  81 #include "gromacs/listed_forces/orires.h"
  82 #include "gromacs/math/functions.h"
  83 #include "gromacs/math/utilities.h"
  84 #include "gromacs/math/vec.h"
  85 #include "gromacs/mdlib/boxdeformation.h"
  86 #include "gromacs/mdlib/broadcaststructs.h"
  87 #include "gromacs/mdlib/calc_verletbuf.h"
  88 #include "gromacs/mdlib/dispersioncorrection.h"
  89 #include "gromacs/mdlib/enerdata_utils.h"
  90 #include "gromacs/mdlib/force.h"
  91 #include "gromacs/mdlib/forcerec.h"
  92 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  93 #include "gromacs/mdlib/makeconstraints.h"
  94 #include "gromacs/mdlib/md_support.h"
  95 #include "gromacs/mdlib/mdatoms.h"
  96 #include "gromacs/mdlib/membed.h"
  97 #include "gromacs/mdlib/ppforceworkload.h"
  98 #include "gromacs/mdlib/qmmm.h"
  99 #include "gromacs/mdlib/sighandler.h"
 100 #include "gromacs/mdlib/stophandler.h"
 101 #include "gromacs/mdrun/mdmodules.h"
 102 #include "gromacs/mdrun/simulationcontext.h"
 103 #include "gromacs/mdrunutility/handlerestart.h"
 104 #include "gromacs/mdrunutility/logging.h"
 105 #include "gromacs/mdrunutility/multisim.h"
 106 #include "gromacs/mdrunutility/printtime.h"
 107 #include "gromacs/mdrunutility/threadaffinity.h"
 108 #include "gromacs/mdtypes/commrec.h"
 109 #include "gromacs/mdtypes/enerdata.h"
 110 #include "gromacs/mdtypes/fcdata.h"
 111 #include "gromacs/mdtypes/group.h"
 112 #include "gromacs/mdtypes/inputrec.h"
 113 #include "gromacs/mdtypes/md_enums.h"
 114 #include "gromacs/mdtypes/mdrunoptions.h"
 115 #include "gromacs/mdtypes/observableshistory.h"
 116 #include "gromacs/mdtypes/state.h"
 117 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 118 #include "gromacs/nbnxm/nbnxm.h"
 119 #include "gromacs/nbnxm/pairlist_tuning.h"
 120 #include "gromacs/pbcutil/pbc.h"
 121 #include "gromacs/pulling/output.h"
 122 #include "gromacs/pulling/pull.h"
 123 #include "gromacs/pulling/pull_rotation.h"
 124 #include "gromacs/restraint/manager.h"
 125 #include "gromacs/restraint/restraintmdmodule.h"
 126 #include "gromacs/restraint/restraintpotential.h"
 127 #include "gromacs/swap/swapcoords.h"
 128 #include "gromacs/taskassignment/decidegpuusage.h"
 129 #include "gromacs/taskassignment/resourcedivision.h"
 130 #include "gromacs/taskassignment/taskassignment.h"
 131 #include "gromacs/taskassignment/usergpuids.h"
 132 #include "gromacs/timing/gpu_timing.h"
 133 #include "gromacs/timing/wallcycle.h"
 134 #include "gromacs/timing/wallcyclereporting.h"
 135 #include "gromacs/topology/mtop_util.h"
 136 #include "gromacs/trajectory/trajectoryframe.h"
 137 #include "gromacs/utility/basenetwork.h"
 138 #include "gromacs/utility/cstringutil.h"
 139 #include "gromacs/utility/exceptions.h"
 140 #include "gromacs/utility/fatalerror.h"
 141 #include "gromacs/utility/filestream.h"
 142 #include "gromacs/utility/gmxassert.h"
 143 #include "gromacs/utility/gmxmpi.h"
 144 #include "gromacs/utility/keyvaluetree.h"
 145 #include "gromacs/utility/logger.h"
 146 #include "gromacs/utility/loggerbuilder.h"
 147 #include "gromacs/utility/mdmodulenotification.h"
 148 #include "gromacs/utility/physicalnodecommunicator.h"
 149 #include "gromacs/utility/pleasecite.h"
 150 #include "gromacs/utility/programcontext.h"
 151 #include "gromacs/utility/smalloc.h"
 152 #include "gromacs/utility/stringutil.h"
 153
 154 #include "isimulator.h"
 155 #include "replicaexchange.h"
 156 #include "simulatorbuilder.h"
 157
 158 #if GMX_FAHCORE
 159 #include "corewrap.h"
 160 #endif
 161
 162 namespace gmx
 163 {
 164
 165 /*! \brief environment variable to enable GPU P2P communication */
 166 static const bool c_enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr)
 167     && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
 168
 169 /*! \brief Manage any development feature flag variables encountered
 170  *
 171  * The use of dev features indicated by environment variables is
 172  * logged in order to ensure that runs with such featrues enabled can
 173  * be identified from their log and standard output. Any cross
 174  * dependencies are also checked, and if unsatisified, a fatal error
 175  * issued.
 176  *
 177  * \param[in]  mdlog        Logger object.
 178  */
 179 static void manageDevelopmentFeatures(const gmx::MDLogger &mdlog)
 180 {
 181     const bool enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 182     const bool useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
 183     const bool enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
 184
 185     if (enableGpuBufOps)
 186     {
 187         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 188                 "NOTE: This run uses the 'GPU buffer ops' feature, enabled by the GMX_USE_GPU_BUFFER_OPS environment variable.");
 189     }
 190
 191     if (enableGpuHaloExchange)
 192     {
 193         if (!enableGpuBufOps)
 194         {
 195             gmx_fatal(FARGS, "Cannot enable GPU halo exchange without GPU buffer operations, set GMX_USE_GPU_BUFFER_OPS=1\n");
 196         }
 197         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 198                 "NOTE: This run uses the 'GPU halo exchange' feature, enabled by the GMX_GPU_DD_COMMS environment variable.");
 199     }
 200
 201     if (useGpuUpdateConstrain)
 202     {
 203         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 204                 "NOTE: This run uses the 'GPU update/constraints' feature, enabled by the GMX_UPDATE_CONSTRAIN_GPU environment variable.");
 205     }
 206
 207 }
 208
 209 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
 210  *
 211  * Used to ensure that the master thread does not modify mdrunner during copy
 212  * on the spawned threads. */
 213 static void threadMpiMdrunnerAccessBarrier()
 214 {
 215 #if GMX_THREAD_MPI
 216     MPI_Barrier(MPI_COMM_WORLD);
 217 #endif
 218 }
 219
 220 Mdrunner Mdrunner::cloneOnSpawnedThread() const
 221 {
 222     auto newRunner = Mdrunner(std::make_unique<MDModules>());
 223
 224     // All runners in the same process share a restraint manager resource because it is
 225     // part of the interface to the client code, which is associated only with the
 226     // original thread. Handles to the same resources can be obtained by copy.
 227     {
 228         newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
 229     }
 230
 231     // Copy original cr pointer before master thread can pass the thread barrier
 232     newRunner.cr  = reinitialize_commrec_for_this_thread(cr);
 233
 234     // Copy members of master runner.
 235     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
 236     // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
 237     newRunner.hw_opt    = hw_opt;
 238     newRunner.filenames = filenames;
 239
 240     newRunner.oenv                = oenv;
 241     newRunner.mdrunOptions        = mdrunOptions;
 242     newRunner.domdecOptions       = domdecOptions;
 243     newRunner.nbpu_opt            = nbpu_opt;
 244     newRunner.pme_opt             = pme_opt;
 245     newRunner.pme_fft_opt         = pme_fft_opt;
 246     newRunner.bonded_opt          = bonded_opt;
 247     newRunner.nstlist_cmdline     = nstlist_cmdline;
 248     newRunner.replExParams        = replExParams;
 249     newRunner.pforce              = pforce;
 250     newRunner.ms                  = ms;
 251     newRunner.startingBehavior    = startingBehavior;
 252     newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
 253
 254     threadMpiMdrunnerAccessBarrier();
 255
 256     GMX_RELEASE_ASSERT(!MASTER(newRunner.cr), "cloneOnSpawnedThread should only be called on spawned threads");
 257
 258     return newRunner;
 259 }
 260
 261 /*! \brief The callback used for running on spawned threads.
 262  *
 263  * Obtains the pointer to the master mdrunner object from the one
 264  * argument permitted to the thread-launch API call, copies it to make
 265  * a new runner for this thread, reinitializes necessary data, and
 266  * proceeds to the simulation. */
 267 static void mdrunner_start_fn(const void *arg)
 268 {
 269     try
 270     {
 271         auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg);
 272         /* copy the arg list to make sure that it's thread-local. This
 273            doesn't copy pointed-to items, of course; fnm, cr and fplog
 274            are reset in the call below, all others should be const. */
 275         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
 276         mdrunner.mdrunner();
 277     }
 278     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 279 }
 280
 281
 282 /*! \brief Start thread-MPI threads.
 283  *
 284  * Called by mdrunner() to start a specific number of threads
 285  * (including the main thread) for thread-parallel runs. This in turn
 286  * calls mdrunner() for each thread. All options are the same as for
 287  * mdrunner(). */
 288 t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const
 289 {
 290
 291     /* first check whether we even need to start tMPI */
 292     if (numThreadsToLaunch < 2)
 293     {
 294         return cr;
 295     }
 296
 297 #if GMX_THREAD_MPI
 298     /* now spawn new threads that start mdrunner_start_fn(), while
 299        the main thread returns. Thread affinity is handled later. */
 300     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
 301                      mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
 302     {
 303         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
 304     }
 305
 306     threadMpiMdrunnerAccessBarrier();
 307 #else
 308     GMX_UNUSED_VALUE(mdrunner_start_fn);
 309 #endif
 310
 311     return reinitialize_commrec_for_this_thread(cr);
 312 }
 313
 314 }  // namespace gmx
 315
 316 /*! \brief Initialize variables for Verlet scheme simulation */
 317 static void prepare_verlet_scheme(FILE                           *fplog,
 318                                   t_commrec                      *cr,
 319                                   t_inputrec                     *ir,
 320                                   int                             nstlist_cmdline,
 321                                   const gmx_mtop_t               *mtop,
 322                                   const matrix                    box,
 323                                   bool                            makeGpuPairList,
 324                                   const gmx::CpuInfo             &cpuinfo)
 325 {
 326     /* For NVE simulations, we will retain the initial list buffer */
 327     if (EI_DYNAMICS(ir->eI) &&
 328         ir->verletbuf_tol > 0 &&
 329         !(EI_MD(ir->eI) && ir->etc == etcNO))
 330     {
 331         /* Update the Verlet buffer size for the current run setup */
 332
 333         /* Here we assume SIMD-enabled kernels are being used. But as currently
 334          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 335          * and 4x2 gives a larger buffer than 4x4, this is ok.
 336          */
 337         ListSetupType      listType  = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
 338         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
 339
 340         const real         rlist_new =
 341             calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
 342
 343         if (rlist_new != ir->rlist)
 344         {
 345             if (fplog != nullptr)
 346             {
 347                 fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 348                         ir->rlist, rlist_new,
 349                         listSetup.cluster_size_i, listSetup.cluster_size_j);
 350             }
 351             ir->rlist     = rlist_new;
 352         }
 353     }
 354
 355     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
 356     {
 357         gmx_fatal(FARGS, "Can not set nstlist without %s",
 358                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
 359     }
 360
 361     if (EI_DYNAMICS(ir->eI))
 362     {
 363         /* Set or try nstlist values */
 364         increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
 365     }
 366 }
 367
 368 /*! \brief Override the nslist value in inputrec
 369  *
 370  * with value passed on the command line (if any)
 371  */
 372 static void override_nsteps_cmdline(const gmx::MDLogger &mdlog,
 373                                     int64_t              nsteps_cmdline,
 374                                     t_inputrec          *ir)
 375 {
 376     assert(ir);
 377
 378     /* override with anything else than the default -2 */
 379     if (nsteps_cmdline > -2)
 380     {
 381         char sbuf_steps[STEPSTRSIZE];
 382         char sbuf_msg[STRLEN];
 383
 384         ir->nsteps = nsteps_cmdline;
 385         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
 386         {
 387             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
 388                     gmx_step_str(nsteps_cmdline, sbuf_steps),
 389                     fabs(nsteps_cmdline*ir->delta_t));
 390         }
 391         else
 392         {
 393             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
 394                     gmx_step_str(nsteps_cmdline, sbuf_steps));
 395         }
 396
 397         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
 398     }
 399     else if (nsteps_cmdline < -2)
 400     {
 401         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64,
 402                   nsteps_cmdline);
 403     }
 404     /* Do nothing if nsteps_cmdline == -2 */
 405 }
 406
 407 namespace gmx
 408 {
 409
 410 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
 411  *
 412  * If not, and if a warning may be issued, logs a warning about
 413  * falling back to CPU code. With thread-MPI, only the first
 414  * call to this function should have \c issueWarning true. */
 415 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
 416                                                const t_inputrec &ir,
 417                                                bool              issueWarning)
 418 {
 419     bool        gpuIsUseful = true;
 420     std::string warning;
 421
 422     if (ir.opts.ngener - ir.nwall > 1)
 423     {
 424         /* The GPU code does not support more than one energy group.
 425          * If the user requested GPUs explicitly, a fatal error is given later.
 426          */
 427         gpuIsUseful = false;
 428         warning     =
 429             "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
 430             "For better performance, run on the GPU without energy groups and then do "
 431             "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
 432     }
 433
 434     if (EI_TPI(ir.eI))
 435     {
 436         gpuIsUseful = false;
 437         warning     = "TPI is not implemented for GPUs.";
 438     }
 439
 440     if (!gpuIsUseful && issueWarning)
 441     {
 442         GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
 443     }
 444
 445     return gpuIsUseful;
 446 }
 447
 448 //! Initializes the logger for mdrun.
 449 static gmx::LoggerOwner buildLogger(FILE *fplog, const t_commrec *cr)
 450 {
 451     gmx::LoggerBuilder builder;
 452     if (fplog != nullptr)
 453     {
 454         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
 455     }
 456     if (cr == nullptr || SIMMASTER(cr))
 457     {
 458         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning,
 459                                 &gmx::TextOutputFile::standardError());
 460     }
 461     return builder.build();
 462 }
 463
 464 //! Make a TaskTarget from an mdrun argument string.
 465 static TaskTarget findTaskTarget(const char *optionString)
 466 {
 467     TaskTarget returnValue = TaskTarget::Auto;
 468
 469     if (strncmp(optionString, "auto", 3) == 0)
 470     {
 471         returnValue = TaskTarget::Auto;
 472     }
 473     else if (strncmp(optionString, "cpu", 3) == 0)
 474     {
 475         returnValue = TaskTarget::Cpu;
 476     }
 477     else if (strncmp(optionString, "gpu", 3) == 0)
 478     {
 479         returnValue = TaskTarget::Gpu;
 480     }
 481     else
 482     {
 483         GMX_ASSERT(false, "Option string should have been checked for sanity already");
 484     }
 485
 486     return returnValue;
 487 }
 488
 489 //! Finish run, aggregate data to print performance info.
 490 static void finish_run(FILE *fplog,
 491                        const gmx::MDLogger &mdlog,
 492                        const t_commrec *cr,
 493                        const t_inputrec *inputrec,
 494                        t_nrnb nrnb[], gmx_wallcycle_t wcycle,
 495                        gmx_walltime_accounting_t walltime_accounting,
 496                        nonbonded_verlet_t *nbv,
 497                        const gmx_pme_t *pme,
 498                        gmx_bool bWriteStat)
 499 {
 500     double  delta_t  = 0;
 501     double  nbfs     = 0, mflop = 0;
 502     double  elapsed_time,
 503             elapsed_time_over_all_ranks,
 504             elapsed_time_over_all_threads,
 505             elapsed_time_over_all_threads_over_all_ranks;
 506     /* Control whether it is valid to print a report. Only the
 507        simulation master may print, but it should not do so if the run
 508        terminated e.g. before a scheduled reset step. This is
 509        complicated by the fact that PME ranks are unaware of the
 510        reason why they were sent a pmerecvqxFINISH. To avoid
 511        communication deadlocks, we always do the communication for the
 512        report, even if we've decided not to write the report, because
 513        how long it takes to finish the run is not important when we've
 514        decided not to report on the simulation performance.
 515
 516        Further, we only report performance for dynamical integrators,
 517        because those are the only ones for which we plan to
 518        consider doing any optimizations. */
 519     bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
 520
 521     if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
 522     {
 523         GMX_LOG(mdlog.warning).asParagraph().appendText("Simulation ended prematurely, no performance report will be written.");
 524         printReport = false;
 525     }
 526
 527     t_nrnb                  *nrnb_tot;
 528     std::unique_ptr<t_nrnb>  nrnbTotalStorage;
 529     if (cr->nnodes > 1)
 530     {
 531         nrnbTotalStorage = std::make_unique<t_nrnb>();
 532         nrnb_tot         = nrnbTotalStorage.get();
 533 #if GMX_MPI
 534         MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
 535                       cr->mpi_comm_mysim);
 536 #endif
 537     }
 538     else
 539     {
 540         nrnb_tot = nrnb;
 541     }
 542
 543     elapsed_time                  = walltime_accounting_get_time_since_reset(walltime_accounting);
 544     elapsed_time_over_all_threads = walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
 545     if (cr->nnodes > 1)
 546     {
 547 #if GMX_MPI
 548         /* reduce elapsed_time over all MPI ranks in the current simulation */
 549         MPI_Allreduce(&elapsed_time,
 550                       &elapsed_time_over_all_ranks,
 551                       1, MPI_DOUBLE, MPI_SUM,
 552                       cr->mpi_comm_mysim);
 553         elapsed_time_over_all_ranks /= cr->nnodes;
 554         /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
 555          * current simulation. */
 556         MPI_Allreduce(&elapsed_time_over_all_threads,
 557                       &elapsed_time_over_all_threads_over_all_ranks,
 558                       1, MPI_DOUBLE, MPI_SUM,
 559                       cr->mpi_comm_mysim);
 560 #endif
 561     }
 562     else
 563     {
 564         elapsed_time_over_all_ranks                  = elapsed_time;
 565         elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
 566     }
 567
 568     if (printReport)
 569     {
 570         print_flop(fplog, nrnb_tot, &nbfs, &mflop);
 571     }
 572
 573     if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
 574     {
 575         print_dd_statistics(cr, inputrec, fplog);
 576     }
 577
 578     /* TODO Move the responsibility for any scaling by thread counts
 579      * to the code that handled the thread region, so that there's a
 580      * mechanism to keep cycle counting working during the transition
 581      * to task parallelism. */
 582     int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 583     int nthreads_pme = gmx_omp_nthreads_get(emntPME);
 584     wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), nthreads_pp, nthreads_pme);
 585     auto cycle_sum(wallcycle_sum(cr, wcycle));
 586
 587     if (printReport)
 588     {
 589         auto                    nbnxn_gpu_timings = (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
 590         gmx_wallclock_gpu_pme_t pme_gpu_timings   = {};
 591
 592         if (pme_gpu_task_enabled(pme))
 593         {
 594             pme_gpu_get_timings(pme, &pme_gpu_timings);
 595         }
 596         wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
 597                         elapsed_time_over_all_ranks,
 598                         wcycle, cycle_sum,
 599                         nbnxn_gpu_timings,
 600                         &pme_gpu_timings);
 601
 602         if (EI_DYNAMICS(inputrec->eI))
 603         {
 604             delta_t = inputrec->delta_t;
 605         }
 606
 607         if (fplog)
 608         {
 609             print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
 610                        elapsed_time_over_all_ranks,
 611                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 612                        delta_t, nbfs, mflop);
 613         }
 614         if (bWriteStat)
 615         {
 616             print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
 617                        elapsed_time_over_all_ranks,
 618                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 619                        delta_t, nbfs, mflop);
 620         }
 621     }
 622 }
 623
 624 int Mdrunner::mdrunner()
 625 {
 626     matrix                    box;
 627     t_forcerec               *fr               = nullptr;
 628     t_fcdata                 *fcd              = nullptr;
 629     real                      ewaldcoeff_q     = 0;
 630     real                      ewaldcoeff_lj    = 0;
 631     int                       nChargePerturbed = -1, nTypePerturbed = 0;
 632     gmx_wallcycle_t           wcycle;
 633     gmx_walltime_accounting_t walltime_accounting = nullptr;
 634     gmx_membed_t *            membed              = nullptr;
 635     gmx_hw_info_t            *hwinfo              = nullptr;
 636
 637     /* CAUTION: threads may be started later on in this function, so
 638        cr doesn't reflect the final parallel state right now */
 639     gmx_mtop_t                      mtop;
 640
 641     bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data());
 642     bool doRerun  = mdrunOptions.rerun;
 643
 644     // Handle task-assignment related user options.
 645     EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
 646                                                EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
 647
 648     std::vector<int> userGpuTaskAssignment;
 649     try
 650     {
 651         userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
 652     }
 653     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 654     auto       nonbondedTarget = findTaskTarget(nbpu_opt);
 655     auto       pmeTarget       = findTaskTarget(pme_opt);
 656     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
 657     auto       bondedTarget    = findTaskTarget(bonded_opt);
 658     PmeRunMode pmeRunMode      = PmeRunMode::None;
 659
 660     // Here we assume that SIMMASTER(cr) does not change even after the
 661     // threads are started.
 662
 663     FILE *fplog = nullptr;
 664     // If we are appending, we don't write log output because we need
 665     // to check that the old log file matches what the checkpoint file
 666     // expects. Otherwise, we should start to write log output now if
 667     // there is a file ready for it.
 668     if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
 669     {
 670         fplog = gmx_fio_getfp(logFileHandle);
 671     }
 672     gmx::LoggerOwner logOwner(buildLogger(fplog, cr));
 673     gmx::MDLogger    mdlog(logOwner.logger());
 674
 675     // report any development features that may be enabled by environment variables
 676     manageDevelopmentFeatures(mdlog);
 677
 678     // With thread-MPI, the communicator changes after threads are
 679     // launched, so this is rebuilt for the master rank at that
 680     // time. The non-master ranks are fine to keep the one made here.
 681     PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
 682     hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
 683
 684     gmx_print_detected_hardware(fplog, isMasterSimMasterRank(ms, MASTER(cr)), mdlog, hwinfo);
 685
 686     std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
 687
 688     // Print citation requests after all software/hardware printing
 689     pleaseCiteGromacs(fplog);
 690
 691     // TODO Replace this by unique_ptr once t_inputrec is C++
 692     t_inputrec               inputrecInstance;
 693     t_inputrec              *inputrec = nullptr;
 694     std::unique_ptr<t_state> globalState;
 695
 696     if (SIMMASTER(cr))
 697     {
 698         /* Only the master rank has the global state */
 699         globalState = std::make_unique<t_state>();
 700
 701         /* Read (nearly) all data required for the simulation */
 702         read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()),
 703                        &inputrecInstance, globalState.get(), &mtop);
 704         inputrec = &inputrecInstance;
 705     }
 706
 707     /* Check and update the hardware options for internal consistency */
 708     checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks, inputrec);
 709
 710     if (GMX_THREAD_MPI && SIMMASTER(cr))
 711     {
 712         bool useGpuForNonbonded = false;
 713         bool useGpuForPme       = false;
 714         try
 715         {
 716             GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
 717
 718             // If the user specified the number of ranks, then we must
 719             // respect that, but in default mode, we need to allow for
 720             // the number of GPUs to choose the number of ranks.
 721             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 722             useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
 723                     (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
 724                     canUseGpuForNonbonded,
 725                     gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
 726                     hw_opt.nthreads_tmpi);
 727             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
 728                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
 729                     *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
 730
 731         }
 732         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 733
 734         /* Determine how many thread-MPI ranks to start.
 735          *
 736          * TODO Over-writing the user-supplied value here does
 737          * prevent any possible subsequent checks from working
 738          * correctly. */
 739         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
 740                                                 &hw_opt,
 741                                                 gpuIdsToUse,
 742                                                 useGpuForNonbonded,
 743                                                 useGpuForPme,
 744                                                 inputrec, &mtop,
 745                                                 mdlog,
 746                                                 doMembed);
 747
 748         // Now start the threads for thread MPI.
 749         cr = spawnThreads(hw_opt.nthreads_tmpi);
 750         /* The main thread continues here with a new cr. We don't deallocate
 751            the old cr because other threads may still be reading it. */
 752         // TODO Both master and spawned threads call dup_tfn and
 753         // reinitialize_commrec_for_this_thread. Find a way to express
 754         // this better.
 755         physicalNodeComm = PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
 756     }
 757     // END OF CAUTION: cr and physicalNodeComm are now reliable
 758
 759     if (PAR(cr))
 760     {
 761         /* now broadcast everything to the non-master nodes/threads: */
 762         if (!SIMMASTER(cr))
 763         {
 764             inputrec = &inputrecInstance;
 765         }
 766         init_parallel(cr, inputrec, &mtop);
 767     }
 768     GMX_RELEASE_ASSERT(inputrec != nullptr, "All range should have a valid inputrec now");
 769
 770     // Now each rank knows the inputrec that SIMMASTER read and used,
 771     // and (if applicable) cr->nnodes has been assigned the number of
 772     // thread-MPI ranks that have been chosen. The ranks can now all
 773     // run the task-deciding functions and will agree on the result
 774     // without needing to communicate.
 775     //
 776     // TODO Should we do the communication in debug mode to support
 777     // having an assertion?
 778     //
 779     // Note that these variables describe only their own node.
 780     //
 781     // Note that when bonded interactions run on a GPU they always run
 782     // alongside a nonbonded task, so do not influence task assignment
 783     // even though they affect the force calculation workload.
 784     bool useGpuForNonbonded = false;
 785     bool useGpuForPme       = false;
 786     bool useGpuForBonded    = false;
 787     try
 788     {
 789         // It's possible that there are different numbers of GPUs on
 790         // different nodes, which is the user's responsibilty to
 791         // handle. If unsuitable, we will notice that during task
 792         // assignment.
 793         bool gpusWereDetected      = hwinfo->ngpu_compatible_tot > 0;
 794         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 795         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
 796                                                                 emulateGpuNonbonded,
 797                                                                 canUseGpuForNonbonded,
 798                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI),
 799                                                                 gpusWereDetected);
 800         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
 801                                                     *hwinfo, *inputrec, mtop,
 802                                                     cr->nnodes, domdecOptions.numPmeRanks,
 803                                                     gpusWereDetected);
 804         auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
 805         useGpuForBonded =
 806             decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
 807                                             bondedTarget, canUseGpuForBonded,
 808                                             EVDW_PME(inputrec->vdwtype),
 809                                             EEL_PME_EWALD(inputrec->coulombtype),
 810                                             domdecOptions.numPmeRanks, gpusWereDetected);
 811
 812         pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
 813         if (pmeRunMode == PmeRunMode::GPU)
 814         {
 815             if (pmeFftTarget == TaskTarget::Cpu)
 816             {
 817                 pmeRunMode = PmeRunMode::Mixed;
 818             }
 819         }
 820         else if (pmeFftTarget == TaskTarget::Gpu)
 821         {
 822             gmx_fatal(FARGS, "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME on CPU you should not be using -pmefft.");
 823         }
 824     }
 825     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 826
 827     // Build restraints.
 828     // TODO: hide restraint implementation details from Mdrunner.
 829     // There is nothing unique about restraints at this point as far as the
 830     // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
 831     // factory functions from the SimulationContext on which to call mdModules_->add().
 832     // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
 833     for (auto && restraint : restraintManager_->getRestraints())
 834     {
 835         auto module = RestraintMDModule::create(restraint,
 836                                                 restraint->sites());
 837         mdModules_->add(std::move(module));
 838     }
 839
 840     // TODO: Error handling
 841     mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
 842     const auto &mdModulesNotifier = mdModules_->notifier().notifier_;
 843
 844     if (inputrec->internalParameters != nullptr)
 845     {
 846         mdModulesNotifier.notify(*inputrec->internalParameters);
 847     }
 848
 849     if (fplog != nullptr)
 850     {
 851         pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
 852         fprintf(fplog, "\n");
 853     }
 854
 855     if (SIMMASTER(cr))
 856     {
 857         /* In rerun, set velocities to zero if present */
 858         if (doRerun && ((globalState->flags & (1 << estV)) != 0))
 859         {
 860             // rerun does not use velocities
 861             GMX_LOG(mdlog.info).asParagraph().appendText(
 862                     "Rerun trajectory contains velocities. Rerun does only evaluate "
 863                     "potential energy and forces. The velocities will be ignored.");
 864             for (int i = 0; i < globalState->natoms; i++)
 865             {
 866                 clear_rvec(globalState->v[i]);
 867             }
 868             globalState->flags &= ~(1 << estV);
 869         }
 870
 871         /* now make sure the state is initialized and propagated */
 872         set_state_entries(globalState.get(), inputrec);
 873     }
 874
 875     /* NM and TPI parallelize over force/energy calculations, not atoms,
 876      * so we need to initialize and broadcast the global state.
 877      */
 878     if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
 879     {
 880         if (!MASTER(cr))
 881         {
 882             globalState = std::make_unique<t_state>();
 883         }
 884         broadcastStateWithoutDynamics(cr, globalState.get());
 885     }
 886
 887     /* A parallel command line option consistency check that we can
 888        only do after any threads have started. */
 889     if (!PAR(cr) && (domdecOptions.numCells[XX] > 1 ||
 890                      domdecOptions.numCells[YY] > 1 ||
 891                      domdecOptions.numCells[ZZ] > 1 ||
 892                      domdecOptions.numPmeRanks > 0))
 893     {
 894         gmx_fatal(FARGS,
 895                   "The -dd or -npme option request a parallel simulation, "
 896 #if !GMX_MPI
 897                   "but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv));
 898 #else
 899 #if GMX_THREAD_MPI
 900                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
 901 #else
 902                   "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv));
 903 #endif
 904 #endif
 905     }
 906
 907     if (doRerun &&
 908         (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 909     {
 910         gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 911     }
 912
 913     if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
 914     {
 915         if (domdecOptions.numPmeRanks > 0)
 916         {
 917             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
 918                                  "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
 919         }
 920
 921         domdecOptions.numPmeRanks = 0;
 922     }
 923
 924     if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
 925     {
 926         /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
 927          * improve performance with many threads per GPU, since our OpenMP
 928          * scaling is bad, but it's difficult to automate the setup.
 929          */
 930         domdecOptions.numPmeRanks = 0;
 931     }
 932     if (useGpuForPme)
 933     {
 934         if (domdecOptions.numPmeRanks < 0)
 935         {
 936             domdecOptions.numPmeRanks = 0;
 937             // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
 938         }
 939         else
 940         {
 941             GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported");
 942         }
 943     }
 944
 945 #if GMX_FAHCORE
 946     if (MASTER(cr))
 947     {
 948         fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
 949     }
 950 #endif
 951
 952     /* NMR restraints must be initialized before load_checkpoint,
 953      * since with time averaging the history is added to t_state.
 954      * For proper consistency check we therefore need to extend
 955      * t_state here.
 956      * So the PME-only nodes (if present) will also initialize
 957      * the distance restraints.
 958      */
 959     snew(fcd, 1);
 960
 961     /* This needs to be called before read_checkpoint to extend the state */
 962     init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
 963
 964     init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
 965
 966     auto                 deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
 967
 968     ObservablesHistory   observablesHistory = {};
 969
 970     if (startingBehavior != StartingBehavior::NewSimulation)
 971     {
 972         /* Check if checkpoint file exists before doing continuation.
 973          * This way we can use identical input options for the first and subsequent runs...
 974          */
 975         if (mdrunOptions.numStepsCommandline > -2)
 976         {
 977             /* Temporarily set the number of steps to unmlimited to avoid
 978              * triggering the nsteps check in load_checkpoint().
 979              * This hack will go away soon when the -nsteps option is removed.
 980              */
 981             inputrec->nsteps = -1;
 982         }
 983
 984         load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr),
 985                         logFileHandle,
 986                         cr, domdecOptions.numCells,
 987                         inputrec, globalState.get(),
 988                         &observablesHistory,
 989                         mdrunOptions.reproducible, mdModules_->notifier());
 990
 991         if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
 992         {
 993             // Now we can start normal logging to the truncated log file.
 994             fplog    = gmx_fio_getfp(logFileHandle);
 995             prepareLogAppending(fplog);
 996             logOwner = buildLogger(fplog, cr);
 997             mdlog    = logOwner.logger();
 998         }
 999     }
1000
1001     if (mdrunOptions.numStepsCommandline > -2)
1002     {
1003         GMX_LOG(mdlog.info).asParagraph().
1004             appendText("The -nsteps functionality is deprecated, and may be removed in a future version. "
1005                        "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp file field.");
1006     }
1007     /* override nsteps with value set on the commamdline */
1008     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
1009
1010     if (SIMMASTER(cr))
1011     {
1012         copy_mat(globalState->box, box);
1013     }
1014
1015     if (PAR(cr))
1016     {
1017         gmx_bcast(sizeof(box), box, cr);
1018     }
1019
1020     if (inputrec->cutoff_scheme != ecutsVERLET)
1021     {
1022         gmx_fatal(FARGS, "This group-scheme .tpr file can no longer be run by mdrun. Please update to the Verlet scheme, or use an earlier version of GROMACS if necessary.");
1023     }
1024     /* Update rlist and nstlist. */
1025     prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
1026                           useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
1027
1028     LocalAtomSetManager atomSets;
1029     if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
1030                      inputrec->eI == eiNM))
1031     {
1032         cr->dd = init_domain_decomposition(mdlog, cr, domdecOptions, mdrunOptions,
1033                                            &mtop, inputrec,
1034                                            box, positionsFromStatePointer(globalState.get()),
1035                                            &atomSets);
1036         // Note that local state still does not exist yet.
1037     }
1038     else
1039     {
1040         /* PME, if used, is done on all nodes with 1D decomposition */
1041         cr->npmenodes = 0;
1042         cr->duty      = (DUTY_PP | DUTY_PME);
1043
1044         if (inputrec->ePBC == epbcSCREW)
1045         {
1046             gmx_fatal(FARGS,
1047                       "pbc=screw is only implemented with domain decomposition");
1048         }
1049     }
1050
1051     if (PAR(cr))
1052     {
1053         /* After possible communicator splitting in make_dd_communicators.
1054          * we can set up the intra/inter node communication.
1055          */
1056         gmx_setup_nodecomm(fplog, cr);
1057     }
1058
1059 #if GMX_MPI
1060     if (isMultiSim(ms))
1061     {
1062         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1063                 "This is simulation %d out of %d running as a composite GROMACS\n"
1064                 "multi-simulation job. Setup for this simulation:\n",
1065                 ms->sim, ms->nsim);
1066     }
1067     GMX_LOG(mdlog.warning).appendTextFormatted(
1068             "Using %d MPI %s\n",
1069             cr->nnodes,
1070 #if GMX_THREAD_MPI
1071             cr->nnodes == 1 ? "thread" : "threads"
1072 #else
1073             cr->nnodes == 1 ? "process" : "processes"
1074 #endif
1075             );
1076     fflush(stderr);
1077 #endif
1078
1079     // If mdrun -pin auto honors any affinity setting that already
1080     // exists. If so, it is nice to provide feedback about whether
1081     // that existing affinity setting was from OpenMP or something
1082     // else, so we run this code both before and after we initialize
1083     // the OpenMP support.
1084     gmx_check_thread_affinity_set(mdlog,
1085                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
1086     /* Check and update the number of OpenMP threads requested */
1087     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
1088                                             pmeRunMode, mtop, *inputrec);
1089
1090     gmx_omp_nthreads_init(mdlog, cr,
1091                           hwinfo->nthreads_hw_avail,
1092                           physicalNodeComm.size_,
1093                           hw_opt.nthreads_omp,
1094                           hw_opt.nthreads_omp_pme,
1095                           !thisRankHasDuty(cr, DUTY_PP));
1096
1097     // Enable FP exception detection, but not in
1098     // Release mode and not for compilers with known buggy FP
1099     // exception support (clang with any optimization) or suspected
1100     // buggy FP exception support (gcc 7.* with optimization).
1101 #if !defined NDEBUG && \
1102     !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
1103     && defined __OPTIMIZE__)
1104     const bool bEnableFPE = true;
1105 #else
1106     const bool bEnableFPE = false;
1107 #endif
1108     //FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
1109     if (bEnableFPE)
1110     {
1111         gmx_feenableexcept();
1112     }
1113
1114     // Build a data structure that expresses which kinds of non-bonded
1115     // task are handled by this rank.
1116     //
1117     // TODO Later, this might become a loop over all registered modules
1118     // relevant to the mdp inputs, to find those that have such tasks.
1119     //
1120     // TODO This could move before init_domain_decomposition() as part
1121     // of refactoring that separates the responsibility for duty
1122     // assignment from setup for communication between tasks, and
1123     // setup for tasks handled with a domain (ie including short-ranged
1124     // tasks, bonded tasks, etc.).
1125     //
1126     // Note that in general useGpuForNonbonded, etc. can have a value
1127     // that is inconsistent with the presence of actual GPUs on any
1128     // rank, and that is not known to be a problem until the
1129     // duty of the ranks on a node become known.
1130     //
1131     // TODO Later we might need the concept of computeTasksOnThisRank,
1132     // from which we construct gpuTasksOnThisRank.
1133     //
1134     // Currently the DD code assigns duty to ranks that can
1135     // include PP work that currently can be executed on a single
1136     // GPU, if present and compatible.  This has to be coordinated
1137     // across PP ranks on a node, with possible multiple devices
1138     // or sharing devices on a node, either from the user
1139     // selection, or automatically.
1140     auto                 haveGpus = !gpuIdsToUse.empty();
1141     std::vector<GpuTask> gpuTasksOnThisRank;
1142     if (thisRankHasDuty(cr, DUTY_PP))
1143     {
1144         if (useGpuForNonbonded)
1145         {
1146             // Note that any bonded tasks on a GPU always accompany a
1147             // non-bonded task.
1148             if (haveGpus)
1149             {
1150                 gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
1151             }
1152             else if (nonbondedTarget == TaskTarget::Gpu)
1153             {
1154                 gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because no GPU is detected.");
1155             }
1156             else if (bondedTarget == TaskTarget::Gpu)
1157             {
1158                 gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because no GPU is detected.");
1159             }
1160         }
1161     }
1162     // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not.
1163     if (EEL_PME(inputrec->coulombtype) && (thisRankHasDuty(cr, DUTY_PME)))
1164     {
1165         if (useGpuForPme)
1166         {
1167             if (haveGpus)
1168             {
1169                 gpuTasksOnThisRank.push_back(GpuTask::Pme);
1170             }
1171             else if (pmeTarget == TaskTarget::Gpu)
1172             {
1173                 gmx_fatal(FARGS, "Cannot run PME on a GPU because no GPU is detected.");
1174             }
1175         }
1176     }
1177
1178     GpuTaskAssignment gpuTaskAssignment;
1179     try
1180     {
1181         // Produce the task assignment for this rank.
1182         gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
1183                                               mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank,
1184                                               useGpuForBonded, pmeRunMode);
1185     }
1186     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1187
1188     /* Prevent other ranks from continuing after an issue was found
1189      * and reported as a fatal error.
1190      *
1191      * TODO This function implements a barrier so that MPI runtimes
1192      * can organize an orderly shutdown if one of the ranks has had to
1193      * issue a fatal error in various code already run. When we have
1194      * MPI-aware error handling and reporting, this should be
1195      * improved. */
1196 #if GMX_MPI
1197     if (PAR(cr))
1198     {
1199         MPI_Barrier(cr->mpi_comm_mysim);
1200     }
1201     if (isMultiSim(ms))
1202     {
1203         if (SIMMASTER(cr))
1204         {
1205             MPI_Barrier(ms->mpi_comm_masters);
1206         }
1207         /* We need another barrier to prevent non-master ranks from contiuing
1208          * when an error occured in a different simulation.
1209          */
1210         MPI_Barrier(cr->mpi_comm_mysim);
1211     }
1212 #endif
1213
1214     /* Now that we know the setup is consistent, check for efficiency */
1215     check_resource_division_efficiency(hwinfo, !gpuTaskAssignment.empty(), mdrunOptions.ntompOptionIsSet,
1216                                        cr, mdlog);
1217
1218     gmx_device_info_t *nonbondedDeviceInfo = nullptr;
1219
1220     if (thisRankHasDuty(cr, DUTY_PP))
1221     {
1222         // This works because only one task of each type is currently permitted.
1223         auto nbGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(),
1224                                              hasTaskType<GpuTask::Nonbonded>);
1225         if (nbGpuTaskMapping != gpuTaskAssignment.end())
1226         {
1227             int nonbondedDeviceId = nbGpuTaskMapping->deviceId_;
1228             nonbondedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, nonbondedDeviceId);
1229             init_gpu(nonbondedDeviceInfo);
1230
1231             if (DOMAINDECOMP(cr))
1232             {
1233                 /* When we share GPUs over ranks, we need to know this for the DLB */
1234                 dd_setup_dlb_resource_sharing(cr, nonbondedDeviceId);
1235             }
1236
1237         }
1238     }
1239
1240     gmx_device_info_t                *pmeDeviceInfo = nullptr;
1241     // Later, this program could contain kernels that might be later
1242     // re-used as auto-tuning progresses, or subsequent simulations
1243     // are invoked.
1244     PmeGpuProgramStorage pmeGpuProgram;
1245     // This works because only one task of each type is currently permitted.
1246     auto                 pmeGpuTaskMapping     = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), hasTaskType<GpuTask::Pme>);
1247     const bool           thisRankHasPmeGpuTask = (pmeGpuTaskMapping != gpuTaskAssignment.end());
1248     if (thisRankHasPmeGpuTask)
1249     {
1250         pmeDeviceInfo = getDeviceInfo(hwinfo->gpu_info, pmeGpuTaskMapping->deviceId_);
1251         init_gpu(pmeDeviceInfo);
1252         pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
1253     }
1254
1255     /* getting number of PP/PME threads on this MPI / tMPI rank.
1256        PME: env variable should be read only on one node to make sure it is
1257        identical everywhere;
1258      */
1259     const int numThreadsOnThisRank =
1260         thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded) : gmx_omp_nthreads_get(emntPME);
1261     checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid,
1262                                   *hwinfo->hardwareTopology,
1263                                   physicalNodeComm, mdlog);
1264
1265     if (hw_opt.threadAffinity != ThreadAffinity::Off)
1266     {
1267         /* Before setting affinity, check whether the affinity has changed
1268          * - which indicates that probably the OpenMP library has changed it
1269          * since we first checked).
1270          */
1271         gmx_check_thread_affinity_set(mdlog,
1272                                       &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
1273
1274         int numThreadsOnThisNode, intraNodeThreadOffset;
1275         analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
1276                                  &intraNodeThreadOffset);
1277
1278         /* Set the CPU affinity */
1279         gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology,
1280                                 numThreadsOnThisRank, numThreadsOnThisNode,
1281                                 intraNodeThreadOffset, nullptr);
1282     }
1283
1284     if (mdrunOptions.timingOptions.resetStep > -1)
1285     {
1286         GMX_LOG(mdlog.info).asParagraph().
1287             appendText("The -resetstep functionality is deprecated, and may be removed in a future version.");
1288     }
1289     wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1290
1291     if (PAR(cr))
1292     {
1293         /* Master synchronizes its value of reset_counters with all nodes
1294          * including PME only nodes */
1295         int64_t reset_counters = wcycle_get_reset_counters(wcycle);
1296         gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
1297         wcycle_set_reset_counters(wcycle, reset_counters);
1298     }
1299
1300     // Membrane embedding must be initialized before we call init_forcerec()
1301     if (doMembed)
1302     {
1303         if (MASTER(cr))
1304         {
1305             fprintf(stderr, "Initializing membed");
1306         }
1307         /* Note that membed cannot work in parallel because mtop is
1308          * changed here. Fix this if we ever want to make it run with
1309          * multiple ranks. */
1310         membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec, globalState.get(), cr,
1311                              &mdrunOptions
1312                                  .checkpointOptions.period);
1313     }
1314
1315     std::unique_ptr<MDAtoms>     mdAtoms;
1316     std::unique_ptr<gmx_vsite_t> vsite;
1317
1318     t_nrnb nrnb;
1319     if (thisRankHasDuty(cr, DUTY_PP))
1320     {
1321         mdModulesNotifier.notify(*cr);
1322         mdModulesNotifier.notify(&atomSets);
1323         mdModulesNotifier.notify(PeriodicBoundaryConditionType {inputrec->ePBC});
1324         /* Initiate forcerecord */
1325         fr                 = new t_forcerec;
1326         fr->forceProviders = mdModules_->initForceProviders();
1327         init_forcerec(fplog, mdlog, fr, fcd,
1328                       inputrec, &mtop, cr, box,
1329                       opt2fn("-table", filenames.size(), filenames.data()),
1330                       opt2fn("-tablep", filenames.size(), filenames.data()),
1331                       opt2fns("-tableb", filenames.size(), filenames.data()),
1332                       *hwinfo, nonbondedDeviceInfo,
1333                       useGpuForBonded,
1334                       FALSE,
1335                       pforce,
1336                       wcycle);
1337
1338         // TODO Move this to happen during domain decomposition setup,
1339         // once stream and event handling works well with that.
1340         if (havePPDomainDecomposition(cr) && c_enableGpuHaloExchange && useGpuForNonbonded)
1341         {
1342             void *stream                   = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
1343             void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event();
1344             cr->dd->gpuHaloExchange = std::make_unique<GpuHaloExchange>(cr->dd, cr->mpi_comm_mysim, stream, coordinatesOnDeviceEvent);
1345         }
1346
1347         /* Initialize the mdAtoms structure.
1348          * mdAtoms is not filled with atom data,
1349          * as this can not be done now with domain decomposition.
1350          */
1351         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1352         if (globalState && thisRankHasPmeGpuTask)
1353         {
1354             // The pinning of coordinates in the global state object works, because we only use
1355             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1356             // points to the global state object without DD.
1357             // FIXME: MD and EM separately set up the local state - this should happen in the same function,
1358             // which should also perform the pinning.
1359             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1360         }
1361
1362         /* Initialize the virtual site communication */
1363         vsite = initVsite(mtop, cr);
1364
1365         calc_shifts(box, fr->shift_vec);
1366
1367         /* With periodic molecules the charge groups should be whole at start up
1368          * and the virtual sites should not be far from their proper positions.
1369          */
1370         if (!inputrec->bContinuation && MASTER(cr) &&
1371             !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
1372         {
1373             /* Make molecules whole at start of run */
1374             if (fr->ePBC != epbcNONE)
1375             {
1376                 do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
1377             }
1378             if (vsite)
1379             {
1380                 /* Correct initial vsite positions are required
1381                  * for the initial distribution in the domain decomposition
1382                  * and for the initial shell prediction.
1383                  */
1384                 constructVsitesGlobal(mtop, globalState->x);
1385             }
1386         }
1387
1388         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1389         {
1390             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1391             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1392         }
1393     }
1394     else
1395     {
1396         /* This is a PME only node */
1397
1398         GMX_ASSERT(globalState == nullptr, "We don't need the state on a PME only rank and expect it to be unitialized");
1399
1400         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1401         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1402     }
1403
1404     gmx_pme_t *sepPmeData = nullptr;
1405     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1406     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), "Double-checking that only PME-only ranks have no forcerec");
1407     gmx_pme_t * &pmedata = fr ? fr->pmedata : sepPmeData;
1408
1409     /* Initiate PME if necessary,
1410      * either on all nodes or on dedicated PME nodes only. */
1411     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1412     {
1413         if (mdAtoms && mdAtoms->mdatoms())
1414         {
1415             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1416             if (EVDW_PME(inputrec->vdwtype))
1417             {
1418                 nTypePerturbed   = mdAtoms->mdatoms()->nTypePerturbed;
1419             }
1420         }
1421         if (cr->npmenodes > 0)
1422         {
1423             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1424             gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
1425             gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
1426         }
1427
1428         if (thisRankHasDuty(cr, DUTY_PME))
1429         {
1430             try
1431             {
1432                 pmedata = gmx_pme_init(cr,
1433                                        getNumPmeDomains(cr->dd),
1434                                        inputrec,
1435                                        nChargePerturbed != 0, nTypePerturbed != 0,
1436                                        mdrunOptions.reproducible,
1437                                        ewaldcoeff_q, ewaldcoeff_lj,
1438                                        gmx_omp_nthreads_get(emntPME),
1439                                        pmeRunMode, nullptr,
1440                                        pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
1441             }
1442             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1443         }
1444     }
1445
1446
1447     if (EI_DYNAMICS(inputrec->eI))
1448     {
1449         /* Turn on signal handling on all nodes */
1450         /*
1451          * (A user signal from the PME nodes (if any)
1452          * is communicated to the PP nodes.
1453          */
1454         signal_handler_install();
1455     }
1456
1457     pull_t *pull_work = nullptr;
1458     if (thisRankHasDuty(cr, DUTY_PP))
1459     {
1460         /* Assumes uniform use of the number of OpenMP threads */
1461         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
1462
1463         if (inputrec->bPull)
1464         {
1465             /* Initialize pull code */
1466             pull_work =
1467                 init_pull(fplog, inputrec->pull, inputrec,
1468                           &mtop, cr, &atomSets, inputrec->fepvals->init_lambda);
1469             if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
1470             {
1471                 initPullHistory(pull_work, &observablesHistory);
1472             }
1473             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1474             {
1475                 init_pull_output_files(pull_work,
1476                                        filenames.size(), filenames.data(), oenv,
1477                                        startingBehavior);
1478             }
1479         }
1480
1481         std::unique_ptr<EnforcedRotation> enforcedRotation;
1482         if (inputrec->bRot)
1483         {
1484             /* Initialize enforced rotation code */
1485             enforcedRotation = init_rot(fplog,
1486                                         inputrec,
1487                                         filenames.size(),
1488                                         filenames.data(),
1489                                         cr,
1490                                         &atomSets,
1491                                         globalState.get(),
1492                                         &mtop,
1493                                         oenv,
1494                                         mdrunOptions,
1495                                         startingBehavior);
1496         }
1497
1498         t_swap *swap = nullptr;
1499         if (inputrec->eSwapCoords != eswapNO)
1500         {
1501             /* Initialize ion swapping code */
1502             swap = init_swapcoords(fplog, inputrec,
1503                                    opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
1504                                    &mtop, globalState.get(), &observablesHistory,
1505                                    cr, &atomSets, oenv, mdrunOptions,
1506                                    startingBehavior);
1507         }
1508
1509         /* Let makeConstraints know whether we have essential dynamics constraints.
1510          * TODO: inputrec should tell us whether we use an algorithm, not a file option or the checkpoint
1511          */
1512         bool doEssentialDynamics = (opt2fn_null("-ei", filenames.size(), filenames.data()) != nullptr
1513                                     || observablesHistory.edsamHistory);
1514         auto constr              = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics,
1515                                                    fplog, *mdAtoms->mdatoms(),
1516                                                    cr, ms, &nrnb, wcycle, fr->bMolPBC);
1517
1518         /* Energy terms and groups */
1519         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(), inputrec->fepvals->n_lambda);
1520
1521         /* Kinetic energy data */
1522         gmx_ekindata_t ekind;
1523         init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel);
1524
1525         /* Set up interactive MD (IMD) */
1526         auto imdSession = makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
1527                                          MASTER(cr) ? globalState->x.rvec_array() : nullptr,
1528                                          filenames.size(), filenames.data(), oenv, mdrunOptions.imdOptions,
1529                                          startingBehavior);
1530
1531         if (DOMAINDECOMP(cr))
1532         {
1533             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1534             /* This call is not included in init_domain_decomposition mainly
1535              * because fr->cginfo_mb is set later.
1536              */
1537             dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
1538                             domdecOptions.checkBondedInteractions,
1539                             fr->cginfo_mb);
1540         }
1541
1542         // TODO This is not the right place to manage the lifetime of
1543         // this data structure, but currently it's the easiest way to
1544         // make it work.
1545         MdScheduleWorkload mdScheduleWork;
1546
1547         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
1548         SimulatorBuilder simulatorBuilder;
1549
1550         // build and run simulator object based on user-input
1551         const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
1552                     false,
1553                     inputrec, doRerun, vsite.get(), ms, replExParams,
1554                     fcd, static_cast<int>(filenames.size()), filenames.data(),
1555                     &observablesHistory, membed);
1556         auto simulator = simulatorBuilder.build(
1557                     inputIsCompatibleWithModularSimulator,
1558                     fplog, cr, ms, mdlog, static_cast<int>(filenames.size()), filenames.data(),
1559                     oenv,
1560                     mdrunOptions,
1561                     startingBehavior,
1562                     vsite.get(), constr.get(),
1563                     enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
1564                     deform.get(),
1565                     mdModules_->outputProvider(),
1566                     mdModules_->notifier(),
1567                     inputrec, imdSession.get(), pull_work, swap, &mtop,
1568                     fcd,
1569                     globalState.get(),
1570                     &observablesHistory,
1571                     mdAtoms.get(), &nrnb, wcycle, fr,
1572                     &enerd,
1573                     &ekind,
1574                     &mdScheduleWork,
1575                     replExParams,
1576                     membed,
1577                     walltime_accounting,
1578                     std::move(stopHandlerBuilder_),
1579                     doRerun);
1580         simulator->run();
1581
1582         if (inputrec->bPull)
1583         {
1584             finish_pull(pull_work);
1585         }
1586         finish_swapcoords(swap);
1587     }
1588     else
1589     {
1590         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
1591         /* do PME only */
1592         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
1593         gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
1594     }
1595
1596     wallcycle_stop(wcycle, ewcRUN);
1597
1598     /* Finish up, write some stuff
1599      * if rerunMD, don't write last frame again
1600      */
1601     finish_run(fplog, mdlog, cr,
1602                inputrec, &nrnb, wcycle, walltime_accounting,
1603                fr ? fr->nbv.get() : nullptr,
1604                pmedata,
1605                EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
1606
1607     // clean up cycle counter
1608     wallcycle_destroy(wcycle);
1609
1610 // Free PME data
1611     if (pmedata)
1612     {
1613         gmx_pme_destroy(pmedata);
1614         pmedata = nullptr;
1615     }
1616
1617     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
1618     // before we destroy the GPU context(s) in free_gpu_resources().
1619     // Pinned buffers are associated with contexts in CUDA.
1620     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
1621     mdAtoms.reset(nullptr);
1622     globalState.reset(nullptr);
1623     mdModules_.reset(nullptr);   // destruct force providers here as they might also use the GPU
1624
1625     /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
1626     free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
1627     free_gpu(nonbondedDeviceInfo);
1628     free_gpu(pmeDeviceInfo);
1629     done_forcerec(fr, mtop.molblock.size());
1630     sfree(fcd);
1631
1632     if (doMembed)
1633     {
1634         free_membed(membed);
1635     }
1636
1637     /* Does what it says */
1638     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
1639     walltime_accounting_destroy(walltime_accounting);
1640
1641     // Ensure log file content is written
1642     if (logFileHandle)
1643     {
1644         gmx_fio_flush(logFileHandle);
1645     }
1646
1647     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
1648      * exceptions were enabled before function was called. */
1649     if (bEnableFPE)
1650     {
1651         gmx_fedisableexcept();
1652     }
1653
1654     auto rc = static_cast<int>(gmx_get_stop_condition());
1655
1656 #if GMX_THREAD_MPI
1657     /* we need to join all threads. The sub-threads join when they
1658        exit this function, but the master thread needs to be told to
1659        wait for that. */
1660     if (PAR(cr) && MASTER(cr))
1661     {
1662         tMPI_Finalize();
1663     }
1664 #endif
1665     return rc;
1666 }
1667
1668 Mdrunner::~Mdrunner()
1669 {
1670     // Clean up of the Manager.
1671     // This will end up getting called on every thread-MPI rank, which is unnecessary,
1672     // but okay as long as threads synchronize some time before adding or accessing
1673     // a new set of restraints.
1674     if (restraintManager_)
1675     {
1676         restraintManager_->clear();
1677         GMX_ASSERT(restraintManager_->countRestraints() == 0,
1678                    "restraints added during runner life time should be cleared at runner destruction.");
1679     }
1680 };
1681
1682 void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller,
1683                             const std::string                        &name)
1684 {
1685     GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
1686     // Not sure if this should be logged through the md logger or something else,
1687     // but it is helpful to have some sort of INFO level message sent somewhere.
1688     //    std::cout << "Registering restraint named " << name << std::endl;
1689
1690     // When multiple restraints are used, it may be wasteful to register them separately.
1691     // Maybe instead register an entire Restraint Manager as a force provider.
1692     restraintManager_->addToSpec(std::move(puller),
1693                                  name);
1694 }
1695
1696 Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules)
1697     : mdModules_(std::move(mdModules))
1698 {
1699 }
1700
1701 Mdrunner::Mdrunner(Mdrunner &&) noexcept = default;
1702
1703 //NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
1704 Mdrunner &Mdrunner::operator=(Mdrunner && /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
1705
1706 class Mdrunner::BuilderImplementation
1707 {
1708     public:
1709         BuilderImplementation() = delete;
1710         BuilderImplementation(std::unique_ptr<MDModules> mdModules);
1711         ~BuilderImplementation();
1712
1713         BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options,
1714                                                     real                forceWarningThreshold,
1715                                                     StartingBehavior    startingBehavior);
1716
1717         void addDomdec(const DomdecOptions &options);
1718
1719         void addVerletList(int nstlist);
1720
1721         void addReplicaExchange(const ReplicaExchangeParameters &params);
1722
1723         void addMultiSim(gmx_multisim_t* multisim);
1724
1725         void addCommunicationRecord(t_commrec *cr);
1726
1727         void addNonBonded(const char* nbpu_opt);
1728
1729         void addPME(const char* pme_opt_, const char* pme_fft_opt_);
1730
1731         void addBondedTaskAssignment(const char* bonded_opt);
1732
1733         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
1734
1735         void addFilenames(ArrayRef <const t_filenm> filenames);
1736
1737         void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
1738
1739         void addLogFile(t_fileio *logFileHandle);
1740
1741         void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
1742
1743         Mdrunner build();
1744
1745     private:
1746
1747         // Default parameters copied from runner.h
1748         // \todo Clarify source(s) of default parameters.
1749
1750         const char* nbpu_opt_    = nullptr;
1751         const char* pme_opt_     = nullptr;
1752         const char* pme_fft_opt_ = nullptr;
1753         const char *bonded_opt_  = nullptr;
1754
1755         MdrunOptions                          mdrunOptions_;
1756
1757         DomdecOptions                         domdecOptions_;
1758
1759         ReplicaExchangeParameters             replicaExchangeParameters_;
1760
1761         //! Command-line override for the duration of a neighbor list with the Verlet scheme.
1762         int         nstlist_ = 0;
1763
1764         //! Multisim communicator handle.
1765         std::unique_ptr<gmx_multisim_t*>      multisim_ = nullptr;
1766
1767         //! Non-owning communication record (only used when building command-line mdrun)
1768         t_commrec *communicationRecord_ = nullptr;
1769
1770         //! Print a warning if any force is larger than this (in kJ/mol nm).
1771         real forceWarningThreshold_ = -1;
1772
1773         //! Whether the simulation will start afresh, or restart with/without appending.
1774         StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
1775
1776         //! The modules that comprise the functionality of mdrun.
1777         std::unique_ptr<MDModules> mdModules_;
1778
1779         //! \brief Parallelism information.
1780         gmx_hw_opt_t hardwareOptions_;
1781
1782         //! filename options for simulation.
1783         ArrayRef<const t_filenm> filenames_;
1784
1785         /*! \brief Handle to output environment.
1786          *
1787          * \todo gmx_output_env_t needs lifetime management.
1788          */
1789         gmx_output_env_t*    outputEnvironment_ = nullptr;
1790
1791         /*! \brief Non-owning handle to MD log file.
1792          *
1793          * \todo Context should own output facilities for client.
1794          * \todo Improve log file handle management.
1795          * \internal
1796          * Code managing the FILE* relies on the ability to set it to
1797          * nullptr to check whether the filehandle is valid.
1798          */
1799         t_fileio* logFileHandle_ = nullptr;
1800
1801         /*!
1802          * \brief Builder for simulation stop signal handler.
1803          */
1804         std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
1805 };
1806
1807 Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules) :
1808     mdModules_(std::move(mdModules))
1809 {
1810 }
1811
1812 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
1813
1814 Mdrunner::BuilderImplementation &
1815 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions    &options,
1816                                                       const real             forceWarningThreshold,
1817                                                       const StartingBehavior startingBehavior)
1818 {
1819     mdrunOptions_          = options;
1820     forceWarningThreshold_ = forceWarningThreshold;
1821     startingBehavior_      = startingBehavior;
1822     return *this;
1823 }
1824
1825 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions &options)
1826 {
1827     domdecOptions_ = options;
1828 }
1829
1830 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
1831 {
1832     nstlist_ = nstlist;
1833 }
1834
1835 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters &params)
1836 {
1837     replicaExchangeParameters_ = params;
1838 }
1839
1840 void Mdrunner::BuilderImplementation::addMultiSim(gmx_multisim_t* multisim)
1841 {
1842     multisim_ = std::make_unique<gmx_multisim_t*>(multisim);
1843 }
1844
1845 void Mdrunner::BuilderImplementation::addCommunicationRecord(t_commrec *cr)
1846 {
1847     communicationRecord_ = cr;
1848 }
1849
1850 Mdrunner Mdrunner::BuilderImplementation::build()
1851 {
1852     auto newRunner = Mdrunner(std::move(mdModules_));
1853
1854     newRunner.mdrunOptions          = mdrunOptions_;
1855     newRunner.pforce                = forceWarningThreshold_;
1856     newRunner.startingBehavior      = startingBehavior_;
1857     newRunner.domdecOptions         = domdecOptions_;
1858
1859     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
1860     newRunner.hw_opt          = hardwareOptions_;
1861
1862     // No invariant to check. This parameter exists to optionally override other behavior.
1863     newRunner.nstlist_cmdline = nstlist_;
1864
1865     newRunner.replExParams    = replicaExchangeParameters_;
1866
1867     newRunner.filenames = filenames_;
1868
1869     GMX_ASSERT(communicationRecord_, "Bug found. It should not be possible to call build() without a valid communicationRecord_.");
1870     newRunner.cr = communicationRecord_;
1871
1872     if (multisim_)
1873     {
1874         // nullptr is a valid value for the multisim handle, so we don't check the pointed-to pointer.
1875         newRunner.ms = *multisim_;
1876     }
1877     else
1878     {
1879         GMX_THROW(gmx::APIError("MdrunnerBuilder::addMultiSim() is required before build()"));
1880     }
1881
1882     // \todo Clarify ownership and lifetime management for gmx_output_env_t
1883     // \todo Update sanity checking when output environment has clearly specified invariants.
1884     // Initialization and default values for oenv are not well specified in the current version.
1885     if (outputEnvironment_)
1886     {
1887         newRunner.oenv  = outputEnvironment_;
1888     }
1889     else
1890     {
1891         GMX_THROW(gmx::APIError("MdrunnerBuilder::addOutputEnvironment() is required before build()"));
1892     }
1893
1894     newRunner.logFileHandle = logFileHandle_;
1895
1896     if (nbpu_opt_)
1897     {
1898         newRunner.nbpu_opt    = nbpu_opt_;
1899     }
1900     else
1901     {
1902         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
1903     }
1904
1905     if (pme_opt_ && pme_fft_opt_)
1906     {
1907         newRunner.pme_opt     = pme_opt_;
1908         newRunner.pme_fft_opt = pme_fft_opt_;
1909     }
1910     else
1911     {
1912         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
1913     }
1914
1915     if (bonded_opt_)
1916     {
1917         newRunner.bonded_opt = bonded_opt_;
1918     }
1919     else
1920     {
1921         GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
1922     }
1923
1924     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
1925
1926     if (stopHandlerBuilder_)
1927     {
1928         newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
1929     }
1930     else
1931     {
1932         newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
1933     }
1934
1935     return newRunner;
1936 }
1937
1938 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
1939 {
1940     nbpu_opt_ = nbpu_opt;
1941 }
1942
1943 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
1944                                              const char* pme_fft_opt)
1945 {
1946     pme_opt_     = pme_opt;
1947     pme_fft_opt_ = pme_fft_opt;
1948 }
1949
1950 void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
1951 {
1952     bonded_opt_ = bonded_opt;
1953 }
1954
1955 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
1956 {
1957     hardwareOptions_ = hardwareOptions;
1958 }
1959
1960 void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
1961 {
1962     filenames_ = filenames;
1963 }
1964
1965 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
1966 {
1967     outputEnvironment_ = outputEnvironment;
1968 }
1969
1970 void Mdrunner::BuilderImplementation::addLogFile(t_fileio *logFileHandle)
1971 {
1972     logFileHandle_ = logFileHandle;
1973 }
1974
1975 void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
1976 {
1977     stopHandlerBuilder_ = std::move(builder);
1978 }
1979
1980 MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules) :
1981     impl_ {std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules))}
1982 {
1983 }
1984
1985 MdrunnerBuilder::~MdrunnerBuilder() = default;
1986
1987 MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions    &options,
1988                                                       real                   forceWarningThreshold,
1989                                                       const StartingBehavior startingBehavior)
1990 {
1991     impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
1992     return *this;
1993 }
1994
1995 MdrunnerBuilder &MdrunnerBuilder::addDomainDecomposition(const DomdecOptions &options)
1996 {
1997     impl_->addDomdec(options);
1998     return *this;
1999 }
2000
2001 MdrunnerBuilder &MdrunnerBuilder::addNeighborList(int nstlist)
2002 {
2003     impl_->addVerletList(nstlist);
2004     return *this;
2005 }
2006
2007 MdrunnerBuilder &MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters &params)
2008 {
2009     impl_->addReplicaExchange(params);
2010     return *this;
2011 }
2012
2013 MdrunnerBuilder &MdrunnerBuilder::addMultiSim(gmx_multisim_t* multisim)
2014 {
2015     impl_->addMultiSim(multisim);
2016     return *this;
2017 }
2018
2019 MdrunnerBuilder &MdrunnerBuilder::addCommunicationRecord(t_commrec *cr)
2020 {
2021     impl_->addCommunicationRecord(cr);
2022     return *this;
2023 }
2024
2025 MdrunnerBuilder &MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
2026 {
2027     impl_->addNonBonded(nbpu_opt);
2028     return *this;
2029 }
2030
2031 MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
2032                                                     const char* pme_fft_opt)
2033 {
2034     // The builder method may become more general in the future, but in this version,
2035     // parameters for PME electrostatics are both required and the only parameters
2036     // available.
2037     if (pme_opt && pme_fft_opt)
2038     {
2039         impl_->addPME(pme_opt, pme_fft_opt);
2040     }
2041     else
2042     {
2043         GMX_THROW(gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
2044     }
2045     return *this;
2046 }
2047
2048 MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
2049 {
2050     impl_->addBondedTaskAssignment(bonded_opt);
2051     return *this;
2052 }
2053
2054 Mdrunner MdrunnerBuilder::build()
2055 {
2056     return impl_->build();
2057 }
2058
2059 MdrunnerBuilder &MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
2060 {
2061     impl_->addHardwareOptions(hardwareOptions);
2062     return *this;
2063 }
2064
2065 MdrunnerBuilder &MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
2066 {
2067     impl_->addFilenames(filenames);
2068     return *this;
2069 }
2070
2071 MdrunnerBuilder &MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2072 {
2073     impl_->addOutputEnvironment(outputEnvironment);
2074     return *this;
2075 }
2076
2077 MdrunnerBuilder &MdrunnerBuilder::addLogFile(t_fileio *logFileHandle)
2078 {
2079     impl_->addLogFile(logFileHandle);
2080     return *this;
2081 }
2082
2083 MdrunnerBuilder &MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2084 {
2085     impl_->addStopHandlerBuilder(std::move(builder));
2086     return *this;
2087 }
2088
2089 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder &&) noexcept = default;
2090
2091 MdrunnerBuilder &MdrunnerBuilder::operator=(MdrunnerBuilder &&) noexcept = default;
2092
2093 } // namespace gmx