src/gromacs/mdrun/runner.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011-2019,2020, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the MD runner routine calling all integrators.
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include "runner.h"
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cinttypes>
  52 #include <csignal>
  53 #include <cstdlib>
  54 #include <cstring>
  55
  56 #include <algorithm>
  57 #include <memory>
  58
  59 #include "gromacs/commandline/filenm.h"
  60 #include "gromacs/domdec/builder.h"
  61 #include "gromacs/domdec/domdec.h"
  62 #include "gromacs/domdec/domdec_struct.h"
  63 #include "gromacs/domdec/gpuhaloexchange.h"
  64 #include "gromacs/domdec/localatomsetmanager.h"
  65 #include "gromacs/domdec/partition.h"
  66 #include "gromacs/ewald/ewald_utils.h"
  67 #include "gromacs/ewald/pme_gpu_program.h"
  68 #include "gromacs/ewald/pme_only.h"
  69 #include "gromacs/ewald/pme_pp_comm_gpu.h"
  70 #include "gromacs/fileio/checkpoint.h"
  71 #include "gromacs/fileio/gmxfio.h"
  72 #include "gromacs/fileio/oenv.h"
  73 #include "gromacs/fileio/tpxio.h"
  74 #include "gromacs/gmxlib/network.h"
  75 #include "gromacs/gmxlib/nrnb.h"
  76 #include "gromacs/gpu_utils/device_stream_manager.h"
  77 #include "gromacs/hardware/cpuinfo.h"
  78 #include "gromacs/hardware/detecthardware.h"
  79 #include "gromacs/hardware/device_management.h"
  80 #include "gromacs/hardware/printhardware.h"
  81 #include "gromacs/imd/imd.h"
  82 #include "gromacs/listed_forces/disre.h"
  83 #include "gromacs/listed_forces/gpubonded.h"
  84 #include "gromacs/listed_forces/listed_forces.h"
  85 #include "gromacs/listed_forces/orires.h"
  86 #include "gromacs/math/functions.h"
  87 #include "gromacs/math/utilities.h"
  88 #include "gromacs/math/vec.h"
  89 #include "gromacs/mdlib/boxdeformation.h"
  90 #include "gromacs/mdlib/broadcaststructs.h"
  91 #include "gromacs/mdlib/calc_verletbuf.h"
  92 #include "gromacs/mdlib/dispersioncorrection.h"
  93 #include "gromacs/mdlib/enerdata_utils.h"
  94 #include "gromacs/mdlib/force.h"
  95 #include "gromacs/mdlib/forcerec.h"
  96 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  97 #include "gromacs/mdlib/gpuforcereduction.h"
  98 #include "gromacs/mdlib/makeconstraints.h"
  99 #include "gromacs/mdlib/md_support.h"
 100 #include "gromacs/mdlib/mdatoms.h"
 101 #include "gromacs/mdlib/sighandler.h"
 102 #include "gromacs/mdlib/stophandler.h"
 103 #include "gromacs/mdlib/tgroup.h"
 104 #include "gromacs/mdlib/updategroups.h"
 105 #include "gromacs/mdlib/vsite.h"
 106 #include "gromacs/mdrun/mdmodules.h"
 107 #include "gromacs/mdrun/simulationcontext.h"
 108 #include "gromacs/mdrun/simulationinput.h"
 109 #include "gromacs/mdrun/simulationinputhandle.h"
 110 #include "gromacs/mdrunutility/handlerestart.h"
 111 #include "gromacs/mdrunutility/logging.h"
 112 #include "gromacs/mdrunutility/multisim.h"
 113 #include "gromacs/mdrunutility/printtime.h"
 114 #include "gromacs/mdrunutility/threadaffinity.h"
 115 #include "gromacs/mdtypes/checkpointdata.h"
 116 #include "gromacs/mdtypes/commrec.h"
 117 #include "gromacs/mdtypes/enerdata.h"
 118 #include "gromacs/mdtypes/fcdata.h"
 119 #include "gromacs/mdtypes/forcerec.h"
 120 #include "gromacs/mdtypes/group.h"
 121 #include "gromacs/mdtypes/inputrec.h"
 122 #include "gromacs/mdtypes/interaction_const.h"
 123 #include "gromacs/mdtypes/md_enums.h"
 124 #include "gromacs/mdtypes/mdatom.h"
 125 #include "gromacs/mdtypes/mdrunoptions.h"
 126 #include "gromacs/mdtypes/observableshistory.h"
 127 #include "gromacs/mdtypes/simulation_workload.h"
 128 #include "gromacs/mdtypes/state.h"
 129 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
 130 #include "gromacs/modularsimulator/modularsimulator.h"
 131 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 132 #include "gromacs/nbnxm/nbnxm.h"
 133 #include "gromacs/nbnxm/pairlist_tuning.h"
 134 #include "gromacs/pbcutil/pbc.h"
 135 #include "gromacs/pulling/output.h"
 136 #include "gromacs/pulling/pull.h"
 137 #include "gromacs/pulling/pull_rotation.h"
 138 #include "gromacs/restraint/manager.h"
 139 #include "gromacs/restraint/restraintmdmodule.h"
 140 #include "gromacs/restraint/restraintpotential.h"
 141 #include "gromacs/swap/swapcoords.h"
 142 #include "gromacs/taskassignment/decidegpuusage.h"
 143 #include "gromacs/taskassignment/decidesimulationworkload.h"
 144 #include "gromacs/taskassignment/resourcedivision.h"
 145 #include "gromacs/taskassignment/taskassignment.h"
 146 #include "gromacs/taskassignment/usergpuids.h"
 147 #include "gromacs/timing/gpu_timing.h"
 148 #include "gromacs/timing/wallcycle.h"
 149 #include "gromacs/timing/wallcyclereporting.h"
 150 #include "gromacs/topology/mtop_util.h"
 151 #include "gromacs/trajectory/trajectoryframe.h"
 152 #include "gromacs/utility/basenetwork.h"
 153 #include "gromacs/utility/cstringutil.h"
 154 #include "gromacs/utility/exceptions.h"
 155 #include "gromacs/utility/fatalerror.h"
 156 #include "gromacs/utility/filestream.h"
 157 #include "gromacs/utility/gmxassert.h"
 158 #include "gromacs/utility/gmxmpi.h"
 159 #include "gromacs/utility/keyvaluetree.h"
 160 #include "gromacs/utility/logger.h"
 161 #include "gromacs/utility/loggerbuilder.h"
 162 #include "gromacs/utility/mdmodulenotification.h"
 163 #include "gromacs/utility/physicalnodecommunicator.h"
 164 #include "gromacs/utility/pleasecite.h"
 165 #include "gromacs/utility/programcontext.h"
 166 #include "gromacs/utility/smalloc.h"
 167 #include "gromacs/utility/stringutil.h"
 168
 169 #include "isimulator.h"
 170 #include "membedholder.h"
 171 #include "replicaexchange.h"
 172 #include "simulatorbuilder.h"
 173
 174 #if GMX_FAHCORE
 175 #    include "corewrap.h"
 176 #endif
 177
 178 namespace gmx
 179 {
 180
 181
 182 /*! \brief Manage any development feature flag variables encountered
 183  *
 184  * The use of dev features indicated by environment variables is
 185  * logged in order to ensure that runs with such features enabled can
 186  * be identified from their log and standard output. Any cross
 187  * dependencies are also checked, and if unsatisfied, a fatal error
 188  * issued.
 189  *
 190  * Note that some development features overrides are applied already here:
 191  * the GPU communication flags are set to false in non-tMPI and non-CUDA builds.
 192  *
 193  * \param[in]  mdlog                Logger object.
 194  * \param[in]  useGpuForNonbonded   True if the nonbonded task is offloaded in this run.
 195  * \param[in]  pmeRunMode           The PME run mode for this run
 196  * \returns                         The object populated with development feature flags.
 197  */
 198 static DevelopmentFeatureFlags manageDevelopmentFeatures(const gmx::MDLogger& mdlog,
 199                                                          const bool           useGpuForNonbonded,
 200                                                          const PmeRunMode     pmeRunMode)
 201 {
 202     DevelopmentFeatureFlags devFlags;
 203
 204     // Some builds of GCC 5 give false positive warnings that these
 205     // getenv results are ignored when clearly they are used.
 206 #pragma GCC diagnostic push
 207 #pragma GCC diagnostic ignored "-Wunused-result"
 208
 209     devFlags.enableGpuBufferOps =
 210             GMX_GPU_CUDA && useGpuForNonbonded && (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 211     devFlags.enableGpuHaloExchange = GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_DD_COMMS") != nullptr;
 212     devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr);
 213     devFlags.enableGpuPmePPComm =
 214             GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_PME_PP_COMMS") != nullptr;
 215
 216 #pragma GCC diagnostic pop
 217
 218     if (devFlags.enableGpuBufferOps)
 219     {
 220         GMX_LOG(mdlog.warning)
 221                 .asParagraph()
 222                 .appendTextFormatted(
 223                         "This run uses the 'GPU buffer ops' feature, enabled by the "
 224                         "GMX_USE_GPU_BUFFER_OPS environment variable.");
 225     }
 226
 227     if (devFlags.forceGpuUpdateDefault)
 228     {
 229         GMX_LOG(mdlog.warning)
 230                 .asParagraph()
 231                 .appendTextFormatted(
 232                         "This run will default to '-update gpu' as requested by the "
 233                         "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable. GPU update with domain "
 234                         "decomposition lacks substantial testing and should be used with caution.");
 235     }
 236
 237     if (devFlags.enableGpuHaloExchange)
 238     {
 239         if (useGpuForNonbonded)
 240         {
 241             if (!devFlags.enableGpuBufferOps)
 242             {
 243                 GMX_LOG(mdlog.warning)
 244                         .asParagraph()
 245                         .appendTextFormatted(
 246                                 "Enabling GPU buffer operations required by GMX_GPU_DD_COMMS "
 247                                 "(equivalent with GMX_USE_GPU_BUFFER_OPS=1).");
 248                 devFlags.enableGpuBufferOps = true;
 249             }
 250             GMX_LOG(mdlog.warning)
 251                     .asParagraph()
 252                     .appendTextFormatted(
 253                             "This run has requested the 'GPU halo exchange' feature, enabled by "
 254                             "the "
 255                             "GMX_GPU_DD_COMMS environment variable.");
 256         }
 257         else
 258         {
 259             GMX_LOG(mdlog.warning)
 260                     .asParagraph()
 261                     .appendTextFormatted(
 262                             "GMX_GPU_DD_COMMS environment variable detected, but the 'GPU "
 263                             "halo exchange' feature will not be enabled as nonbonded interactions "
 264                             "are not offloaded.");
 265             devFlags.enableGpuHaloExchange = false;
 266         }
 267     }
 268
 269     if (devFlags.enableGpuPmePPComm)
 270     {
 271         if (pmeRunMode == PmeRunMode::GPU)
 272         {
 273             if (!devFlags.enableGpuBufferOps)
 274             {
 275                 GMX_LOG(mdlog.warning)
 276                         .asParagraph()
 277                         .appendTextFormatted(
 278                                 "Enabling GPU buffer operations required by GMX_GPU_PME_PP_COMMS "
 279                                 "(equivalent with GMX_USE_GPU_BUFFER_OPS=1).");
 280                 devFlags.enableGpuBufferOps = true;
 281             }
 282             GMX_LOG(mdlog.warning)
 283                     .asParagraph()
 284                     .appendTextFormatted(
 285                             "This run uses the 'GPU PME-PP communications' feature, enabled "
 286                             "by the GMX_GPU_PME_PP_COMMS environment variable.");
 287         }
 288         else
 289         {
 290             std::string clarification;
 291             if (pmeRunMode == PmeRunMode::Mixed)
 292             {
 293                 clarification =
 294                         "PME FFT and gather are not offloaded to the GPU (PME is running in mixed "
 295                         "mode).";
 296             }
 297             else
 298             {
 299                 clarification = "PME is not offloaded to the GPU.";
 300             }
 301             GMX_LOG(mdlog.warning)
 302                     .asParagraph()
 303                     .appendText(
 304                             "GMX_GPU_PME_PP_COMMS environment variable detected, but the "
 305                             "'GPU PME-PP communications' feature was not enabled as "
 306                             + clarification);
 307             devFlags.enableGpuPmePPComm = false;
 308         }
 309     }
 310
 311     return devFlags;
 312 }
 313
 314 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
 315  *
 316  * Used to ensure that the master thread does not modify mdrunner during copy
 317  * on the spawned threads. */
 318 static void threadMpiMdrunnerAccessBarrier()
 319 {
 320 #if GMX_THREAD_MPI
 321     MPI_Barrier(MPI_COMM_WORLD);
 322 #endif
 323 }
 324
 325 Mdrunner Mdrunner::cloneOnSpawnedThread() const
 326 {
 327     auto newRunner = Mdrunner(std::make_unique<MDModules>());
 328
 329     // All runners in the same process share a restraint manager resource because it is
 330     // part of the interface to the client code, which is associated only with the
 331     // original thread. Handles to the same resources can be obtained by copy.
 332     {
 333         newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
 334     }
 335
 336     // Copy members of master runner.
 337     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
 338     // Ref https://gitlab.com/gromacs/gromacs/-/issues/2587 and https://gitlab.com/gromacs/gromacs/-/issues/2375
 339     newRunner.hw_opt    = hw_opt;
 340     newRunner.filenames = filenames;
 341
 342     newRunner.oenv            = oenv;
 343     newRunner.mdrunOptions    = mdrunOptions;
 344     newRunner.domdecOptions   = domdecOptions;
 345     newRunner.nbpu_opt        = nbpu_opt;
 346     newRunner.pme_opt         = pme_opt;
 347     newRunner.pme_fft_opt     = pme_fft_opt;
 348     newRunner.bonded_opt      = bonded_opt;
 349     newRunner.update_opt      = update_opt;
 350     newRunner.nstlist_cmdline = nstlist_cmdline;
 351     newRunner.replExParams    = replExParams;
 352     newRunner.pforce          = pforce;
 353     // Give the spawned thread the newly created valid communicator
 354     // for the simulation.
 355     newRunner.libraryWorldCommunicator = MPI_COMM_WORLD;
 356     newRunner.simulationCommunicator   = MPI_COMM_WORLD;
 357     newRunner.ms                       = ms;
 358     newRunner.startingBehavior         = startingBehavior;
 359     newRunner.stopHandlerBuilder_      = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
 360     newRunner.inputHolder_             = inputHolder_;
 361
 362     threadMpiMdrunnerAccessBarrier();
 363
 364     return newRunner;
 365 }
 366
 367 /*! \brief The callback used for running on spawned threads.
 368  *
 369  * Obtains the pointer to the master mdrunner object from the one
 370  * argument permitted to the thread-launch API call, copies it to make
 371  * a new runner for this thread, reinitializes necessary data, and
 372  * proceeds to the simulation. */
 373 static void mdrunner_start_fn(const void* arg)
 374 {
 375     try
 376     {
 377         auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner*>(arg);
 378         /* copy the arg list to make sure that it's thread-local. This
 379            doesn't copy pointed-to items, of course; fnm, cr and fplog
 380            are reset in the call below, all others should be const. */
 381         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
 382         mdrunner.mdrunner();
 383     }
 384     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 385 }
 386
 387
 388 void Mdrunner::spawnThreads(int numThreadsToLaunch)
 389 {
 390 #if GMX_THREAD_MPI
 391     /* now spawn new threads that start mdrunner_start_fn(), while
 392        the main thread returns. Thread affinity is handled later. */
 393     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE, mdrunner_start_fn,
 394                      static_cast<const void*>(this))
 395         != TMPI_SUCCESS)
 396     {
 397         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
 398     }
 399
 400     // Give the master thread the newly created valid communicator for
 401     // the simulation.
 402     libraryWorldCommunicator = MPI_COMM_WORLD;
 403     simulationCommunicator   = MPI_COMM_WORLD;
 404     threadMpiMdrunnerAccessBarrier();
 405 #else
 406     GMX_UNUSED_VALUE(numThreadsToLaunch);
 407     GMX_UNUSED_VALUE(mdrunner_start_fn);
 408 #endif
 409 }
 410
 411 } // namespace gmx
 412
 413 /*! \brief Initialize variables for Verlet scheme simulation */
 414 static void prepare_verlet_scheme(FILE*               fplog,
 415                                   t_commrec*          cr,
 416                                   t_inputrec*         ir,
 417                                   int                 nstlist_cmdline,
 418                                   const gmx_mtop_t*   mtop,
 419                                   const matrix        box,
 420                                   bool                makeGpuPairList,
 421                                   const gmx::CpuInfo& cpuinfo)
 422 {
 423     // We checked the cut-offs in grompp, but double-check here.
 424     // We have PME+LJcutoff kernels for rcoulomb>rvdw.
 425     if (EEL_PME_EWALD(ir->coulombtype) && ir->vdwtype == eelCUT)
 426     {
 427         GMX_RELEASE_ASSERT(ir->rcoulomb >= ir->rvdw,
 428                            "With Verlet lists and PME we should have rcoulomb>=rvdw");
 429     }
 430     else
 431     {
 432         GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw,
 433                            "With Verlet lists and no PME rcoulomb and rvdw should be identical");
 434     }
 435     /* For NVE simulations, we will retain the initial list buffer */
 436     if (EI_DYNAMICS(ir->eI) && ir->verletbuf_tol > 0 && !(EI_MD(ir->eI) && ir->etc == etcNO))
 437     {
 438         /* Update the Verlet buffer size for the current run setup */
 439
 440         /* Here we assume SIMD-enabled kernels are being used. But as currently
 441          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 442          * and 4x2 gives a larger buffer than 4x4, this is ok.
 443          */
 444         ListSetupType listType =
 445                 (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
 446         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
 447
 448         const real rlist_new =
 449                 calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
 450
 451         if (rlist_new != ir->rlist)
 452         {
 453             if (fplog != nullptr)
 454             {
 455                 fprintf(fplog,
 456                         "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 457                         ir->rlist, rlist_new, listSetup.cluster_size_i, listSetup.cluster_size_j);
 458             }
 459             ir->rlist = rlist_new;
 460         }
 461     }
 462
 463     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
 464     {
 465         gmx_fatal(FARGS, "Can not set nstlist without %s",
 466                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
 467     }
 468
 469     if (EI_DYNAMICS(ir->eI))
 470     {
 471         /* Set or try nstlist values */
 472         increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
 473     }
 474 }
 475
 476 /*! \brief Override the nslist value in inputrec
 477  *
 478  * with value passed on the command line (if any)
 479  */
 480 static void override_nsteps_cmdline(const gmx::MDLogger& mdlog, int64_t nsteps_cmdline, t_inputrec* ir)
 481 {
 482     assert(ir);
 483
 484     /* override with anything else than the default -2 */
 485     if (nsteps_cmdline > -2)
 486     {
 487         char sbuf_steps[STEPSTRSIZE];
 488         char sbuf_msg[STRLEN];
 489
 490         ir->nsteps = nsteps_cmdline;
 491         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
 492         {
 493             sprintf(sbuf_msg,
 494                     "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
 495                     gmx_step_str(nsteps_cmdline, sbuf_steps), fabs(nsteps_cmdline * ir->delta_t));
 496         }
 497         else
 498         {
 499             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
 500                     gmx_step_str(nsteps_cmdline, sbuf_steps));
 501         }
 502
 503         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
 504     }
 505     else if (nsteps_cmdline < -2)
 506     {
 507         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64, nsteps_cmdline);
 508     }
 509     /* Do nothing if nsteps_cmdline == -2 */
 510 }
 511
 512 namespace gmx
 513 {
 514
 515 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
 516  *
 517  * If not, and if a warning may be issued, logs a warning about
 518  * falling back to CPU code. With thread-MPI, only the first
 519  * call to this function should have \c issueWarning true. */
 520 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger& mdlog, const t_inputrec& ir, bool issueWarning)
 521 {
 522     bool        gpuIsUseful = true;
 523     std::string warning;
 524
 525     if (ir.opts.ngener - ir.nwall > 1)
 526     {
 527         /* The GPU code does not support more than one energy group.
 528          * If the user requested GPUs explicitly, a fatal error is given later.
 529          */
 530         gpuIsUseful = false;
 531         warning =
 532                 "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
 533                 "For better performance, run on the GPU without energy groups and then do "
 534                 "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
 535     }
 536
 537     if (EI_TPI(ir.eI))
 538     {
 539         gpuIsUseful = false;
 540         warning     = "TPI is not implemented for GPUs.";
 541     }
 542
 543     if (!gpuIsUseful && issueWarning)
 544     {
 545         GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
 546     }
 547
 548     return gpuIsUseful;
 549 }
 550
 551 //! Initializes the logger for mdrun.
 552 static gmx::LoggerOwner buildLogger(FILE* fplog, const bool isSimulationMasterRank)
 553 {
 554     gmx::LoggerBuilder builder;
 555     if (fplog != nullptr)
 556     {
 557         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
 558     }
 559     if (isSimulationMasterRank)
 560     {
 561         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
 562     }
 563     return builder.build();
 564 }
 565
 566 //! Make a TaskTarget from an mdrun argument string.
 567 static TaskTarget findTaskTarget(const char* optionString)
 568 {
 569     TaskTarget returnValue = TaskTarget::Auto;
 570
 571     if (strncmp(optionString, "auto", 3) == 0)
 572     {
 573         returnValue = TaskTarget::Auto;
 574     }
 575     else if (strncmp(optionString, "cpu", 3) == 0)
 576     {
 577         returnValue = TaskTarget::Cpu;
 578     }
 579     else if (strncmp(optionString, "gpu", 3) == 0)
 580     {
 581         returnValue = TaskTarget::Gpu;
 582     }
 583     else
 584     {
 585         GMX_ASSERT(false, "Option string should have been checked for sanity already");
 586     }
 587
 588     return returnValue;
 589 }
 590
 591 //! Finish run, aggregate data to print performance info.
 592 static void finish_run(FILE*                     fplog,
 593                        const gmx::MDLogger&      mdlog,
 594                        const t_commrec*          cr,
 595                        const t_inputrec*         inputrec,
 596                        t_nrnb                    nrnb[],
 597                        gmx_wallcycle_t           wcycle,
 598                        gmx_walltime_accounting_t walltime_accounting,
 599                        nonbonded_verlet_t*       nbv,
 600                        const gmx_pme_t*          pme,
 601                        gmx_bool                  bWriteStat)
 602 {
 603     double delta_t = 0;
 604     double nbfs = 0, mflop = 0;
 605     double elapsed_time, elapsed_time_over_all_ranks, elapsed_time_over_all_threads,
 606             elapsed_time_over_all_threads_over_all_ranks;
 607     /* Control whether it is valid to print a report. Only the
 608        simulation master may print, but it should not do so if the run
 609        terminated e.g. before a scheduled reset step. This is
 610        complicated by the fact that PME ranks are unaware of the
 611        reason why they were sent a pmerecvqxFINISH. To avoid
 612        communication deadlocks, we always do the communication for the
 613        report, even if we've decided not to write the report, because
 614        how long it takes to finish the run is not important when we've
 615        decided not to report on the simulation performance.
 616
 617        Further, we only report performance for dynamical integrators,
 618        because those are the only ones for which we plan to
 619        consider doing any optimizations. */
 620     bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
 621
 622     if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
 623     {
 624         GMX_LOG(mdlog.warning)
 625                 .asParagraph()
 626                 .appendText("Simulation ended prematurely, no performance report will be written.");
 627         printReport = false;
 628     }
 629
 630     t_nrnb*                 nrnb_tot;
 631     std::unique_ptr<t_nrnb> nrnbTotalStorage;
 632     if (cr->nnodes > 1)
 633     {
 634         nrnbTotalStorage = std::make_unique<t_nrnb>();
 635         nrnb_tot         = nrnbTotalStorage.get();
 636 #if GMX_MPI
 637         MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
 638 #endif
 639     }
 640     else
 641     {
 642         nrnb_tot = nrnb;
 643     }
 644
 645     elapsed_time = walltime_accounting_get_time_since_reset(walltime_accounting);
 646     elapsed_time_over_all_threads =
 647             walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
 648     if (cr->nnodes > 1)
 649     {
 650 #if GMX_MPI
 651         /* reduce elapsed_time over all MPI ranks in the current simulation */
 652         MPI_Allreduce(&elapsed_time, &elapsed_time_over_all_ranks, 1, MPI_DOUBLE, MPI_SUM,
 653                       cr->mpi_comm_mysim);
 654         elapsed_time_over_all_ranks /= cr->nnodes;
 655         /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
 656          * current simulation. */
 657         MPI_Allreduce(&elapsed_time_over_all_threads, &elapsed_time_over_all_threads_over_all_ranks,
 658                       1, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
 659 #endif
 660     }
 661     else
 662     {
 663         elapsed_time_over_all_ranks                  = elapsed_time;
 664         elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
 665     }
 666
 667     if (printReport)
 668     {
 669         print_flop(fplog, nrnb_tot, &nbfs, &mflop);
 670     }
 671
 672     if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
 673     {
 674         print_dd_statistics(cr, inputrec, fplog);
 675     }
 676
 677     /* TODO Move the responsibility for any scaling by thread counts
 678      * to the code that handled the thread region, so that there's a
 679      * mechanism to keep cycle counting working during the transition
 680      * to task parallelism. */
 681     int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 682     int nthreads_pme = gmx_omp_nthreads_get(emntPME);
 683     wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP),
 684                                    nthreads_pp, nthreads_pme);
 685     auto cycle_sum(wallcycle_sum(cr, wcycle));
 686
 687     if (printReport)
 688     {
 689         auto nbnxn_gpu_timings =
 690                 (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
 691         gmx_wallclock_gpu_pme_t pme_gpu_timings = {};
 692
 693         if (pme_gpu_task_enabled(pme))
 694         {
 695             pme_gpu_get_timings(pme, &pme_gpu_timings);
 696         }
 697         wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
 698                         elapsed_time_over_all_ranks, wcycle, cycle_sum, nbnxn_gpu_timings,
 699                         &pme_gpu_timings);
 700
 701         if (EI_DYNAMICS(inputrec->eI))
 702         {
 703             delta_t = inputrec->delta_t;
 704         }
 705
 706         if (fplog)
 707         {
 708             print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks,
 709                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 710                        delta_t, nbfs, mflop);
 711         }
 712         if (bWriteStat)
 713         {
 714             print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks,
 715                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 716                        delta_t, nbfs, mflop);
 717         }
 718     }
 719 }
 720
 721 int Mdrunner::mdrunner()
 722 {
 723     matrix                    box;
 724     t_forcerec*               fr               = nullptr;
 725     real                      ewaldcoeff_q     = 0;
 726     real                      ewaldcoeff_lj    = 0;
 727     int                       nChargePerturbed = -1, nTypePerturbed = 0;
 728     gmx_wallcycle_t           wcycle;
 729     gmx_walltime_accounting_t walltime_accounting = nullptr;
 730     MembedHolder              membedHolder(filenames.size(), filenames.data());
 731     gmx_hw_info_t*            hwinfo = nullptr;
 732
 733     /* CAUTION: threads may be started later on in this function, so
 734        cr doesn't reflect the final parallel state right now */
 735     gmx_mtop_t mtop;
 736
 737     /* TODO: inputrec should tell us whether we use an algorithm, not a file option */
 738     const bool doEssentialDynamics = opt2bSet("-ei", filenames.size(), filenames.data());
 739     const bool doRerun             = mdrunOptions.rerun;
 740
 741     // Handle task-assignment related user options.
 742     EmulateGpuNonbonded emulateGpuNonbonded =
 743             (getenv("GMX_EMULATE_GPU") != nullptr ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
 744
 745     std::vector<int> userGpuTaskAssignment;
 746     try
 747     {
 748         userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
 749     }
 750     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 751     auto nonbondedTarget = findTaskTarget(nbpu_opt);
 752     auto pmeTarget       = findTaskTarget(pme_opt);
 753     auto pmeFftTarget    = findTaskTarget(pme_fft_opt);
 754     auto bondedTarget    = findTaskTarget(bonded_opt);
 755     auto updateTarget    = findTaskTarget(update_opt);
 756
 757     FILE* fplog = nullptr;
 758     // If we are appending, we don't write log output because we need
 759     // to check that the old log file matches what the checkpoint file
 760     // expects. Otherwise, we should start to write log output now if
 761     // there is a file ready for it.
 762     if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
 763     {
 764         fplog = gmx_fio_getfp(logFileHandle);
 765     }
 766     const bool isSimulationMasterRank = findIsSimulationMasterRank(ms, simulationCommunicator);
 767     gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank));
 768     gmx::MDLogger    mdlog(logOwner.logger());
 769
 770     // TODO The thread-MPI master rank makes a working
 771     // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks
 772     // after the threads have been launched. This works because no use
 773     // is made of that communicator until after the execution paths
 774     // have rejoined. But it is likely that we can improve the way
 775     // this is expressed, e.g. by expressly running detection only the
 776     // master rank for thread-MPI, rather than relying on the mutex
 777     // and reference count.
 778     PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash());
 779     hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
 780
 781     gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
 782
 783     std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->deviceInfoList, hw_opt.gpuIdsAvailable);
 784     const int        numDevicesToUse = gmx::ssize(gpuIdsToUse);
 785
 786     // Print citation requests after all software/hardware printing
 787     pleaseCiteGromacs(fplog);
 788
 789     // Note: legacy program logic relies on checking whether these pointers are assigned.
 790     // Objects may or may not be allocated later.
 791     std::unique_ptr<t_inputrec> inputrec;
 792     std::unique_ptr<t_state>    globalState;
 793
 794     auto partialDeserializedTpr = std::make_unique<PartialDeserializedTprFile>();
 795
 796     if (isSimulationMasterRank)
 797     {
 798         // Allocate objects to be initialized by later function calls.
 799         /* Only the master rank has the global state */
 800         globalState = std::make_unique<t_state>();
 801         inputrec    = std::make_unique<t_inputrec>();
 802
 803         /* Read (nearly) all data required for the simulation
 804          * and keep the partly serialized tpr contents to send to other ranks later
 805          */
 806         applyGlobalSimulationState(*inputHolder_.get(), partialDeserializedTpr.get(),
 807                                    globalState.get(), inputrec.get(), &mtop);
 808     }
 809
 810     /* Check and update the hardware options for internal consistency */
 811     checkAndUpdateHardwareOptions(mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks,
 812                                   inputrec.get());
 813
 814     if (GMX_THREAD_MPI && isSimulationMasterRank)
 815     {
 816         bool useGpuForNonbonded = false;
 817         bool useGpuForPme       = false;
 818         try
 819         {
 820             GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
 821
 822             // If the user specified the number of ranks, then we must
 823             // respect that, but in default mode, we need to allow for
 824             // the number of GPUs to choose the number of ranks.
 825             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 826             useGpuForNonbonded         = decideWhetherToUseGpusForNonbondedWithThreadMpi(
 827                     nonbondedTarget, numDevicesToUse, userGpuTaskAssignment, emulateGpuNonbonded,
 828                     canUseGpuForNonbonded,
 829                     gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
 830                     hw_opt.nthreads_tmpi);
 831             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(
 832                     useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo,
 833                     *inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
 834         }
 835         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 836
 837         /* Determine how many thread-MPI ranks to start.
 838          *
 839          * TODO Over-writing the user-supplied value here does
 840          * prevent any possible subsequent checks from working
 841          * correctly. */
 842         hw_opt.nthreads_tmpi =
 843                 get_nthreads_mpi(hwinfo, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
 844                                  inputrec.get(), &mtop, mdlog, membedHolder.doMembed());
 845
 846         // Now start the threads for thread MPI.
 847         spawnThreads(hw_opt.nthreads_tmpi);
 848         // The spawned threads enter mdrunner() and execution of
 849         // master and spawned threads joins at the end of this block.
 850         physicalNodeComm =
 851                 PhysicalNodeCommunicator(libraryWorldCommunicator, gmx_physicalnode_id_hash());
 852     }
 853
 854     GMX_RELEASE_ASSERT(ms || simulationCommunicator != MPI_COMM_NULL,
 855                        "Must have valid communicator unless running a multi-simulation");
 856     CommrecHandle crHandle = init_commrec(simulationCommunicator);
 857     t_commrec*    cr       = crHandle.get();
 858     GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec");
 859
 860     if (PAR(cr))
 861     {
 862         /* now broadcast everything to the non-master nodes/threads: */
 863         if (!isSimulationMasterRank)
 864         {
 865             // Until now, only the master rank has a non-null pointer.
 866             // On non-master ranks, allocate the object that will receive data in the following call.
 867             inputrec = std::make_unique<t_inputrec>();
 868         }
 869         init_parallel(cr->mpiDefaultCommunicator, MASTER(cr), inputrec.get(), &mtop,
 870                       partialDeserializedTpr.get());
 871     }
 872     GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now");
 873     partialDeserializedTpr.reset(nullptr);
 874
 875     // Now the number of ranks is known to all ranks, and each knows
 876     // the inputrec read by the master rank. The ranks can now all run
 877     // the task-deciding functions and will agree on the result
 878     // without needing to communicate.
 879     const bool useDomainDecomposition = (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == eiNM));
 880
 881     // Note that these variables describe only their own node.
 882     //
 883     // Note that when bonded interactions run on a GPU they always run
 884     // alongside a nonbonded task, so do not influence task assignment
 885     // even though they affect the force calculation workload.
 886     bool useGpuForNonbonded = false;
 887     bool useGpuForPme       = false;
 888     bool useGpuForBonded    = false;
 889     bool useGpuForUpdate    = false;
 890     bool gpusWereDetected   = hwinfo->ngpu_compatible_tot > 0;
 891     try
 892     {
 893         // It's possible that there are different numbers of GPUs on
 894         // different nodes, which is the user's responsibility to
 895         // handle. If unsuitable, we will notice that during task
 896         // assignment.
 897         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 898         useGpuForNonbonded         = decideWhetherToUseGpusForNonbonded(
 899                 nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded,
 900                 gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected);
 901         useGpuForPme = decideWhetherToUseGpusForPme(
 902                 useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo, *inputrec,
 903                 cr->sizeOfDefaultCommunicator, domdecOptions.numPmeRanks, gpusWereDetected);
 904         auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr)
 905                                   && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
 906         useGpuForBonded = decideWhetherToUseGpusForBonded(
 907                 useGpuForNonbonded, useGpuForPme, bondedTarget, canUseGpuForBonded,
 908                 EVDW_PME(inputrec->vdwtype), EEL_PME_EWALD(inputrec->coulombtype),
 909                 domdecOptions.numPmeRanks, gpusWereDetected);
 910     }
 911     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 912
 913     const PmeRunMode pmeRunMode = determinePmeRunMode(useGpuForPme, pmeFftTarget, *inputrec);
 914
 915     // Initialize development feature flags that enabled by environment variable
 916     // and report those features that are enabled.
 917     const DevelopmentFeatureFlags devFlags =
 918             manageDevelopmentFeatures(mdlog, useGpuForNonbonded, pmeRunMode);
 919
 920     const bool useModularSimulator =
 921             checkUseModularSimulator(false, inputrec.get(), doRerun, mtop, ms, replExParams,
 922                                      nullptr, doEssentialDynamics, membedHolder.doMembed());
 923
 924     // Build restraints.
 925     // TODO: hide restraint implementation details from Mdrunner.
 926     // There is nothing unique about restraints at this point as far as the
 927     // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
 928     // factory functions from the SimulationContext on which to call mdModules_->add().
 929     // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
 930     for (auto&& restraint : restraintManager_->getRestraints())
 931     {
 932         auto module = RestraintMDModule::create(restraint, restraint->sites());
 933         mdModules_->add(std::move(module));
 934     }
 935
 936     // TODO: Error handling
 937     mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
 938     // now that the MdModules know their options, they know which callbacks to sign up to
 939     mdModules_->subscribeToSimulationSetupNotifications();
 940     const auto& mdModulesNotifier = mdModules_->notifier().simulationSetupNotifications_;
 941
 942     if (inputrec->internalParameters != nullptr)
 943     {
 944         mdModulesNotifier.notify(*inputrec->internalParameters);
 945     }
 946
 947     if (fplog != nullptr)
 948     {
 949         pr_inputrec(fplog, 0, "Input Parameters", inputrec.get(), FALSE);
 950         fprintf(fplog, "\n");
 951     }
 952
 953     if (SIMMASTER(cr))
 954     {
 955         /* In rerun, set velocities to zero if present */
 956         if (doRerun && ((globalState->flags & (1 << estV)) != 0))
 957         {
 958             // rerun does not use velocities
 959             GMX_LOG(mdlog.info)
 960                     .asParagraph()
 961                     .appendText(
 962                             "Rerun trajectory contains velocities. Rerun does only evaluate "
 963                             "potential energy and forces. The velocities will be ignored.");
 964             for (int i = 0; i < globalState->natoms; i++)
 965             {
 966                 clear_rvec(globalState->v[i]);
 967             }
 968             globalState->flags &= ~(1 << estV);
 969         }
 970
 971         /* now make sure the state is initialized and propagated */
 972         set_state_entries(globalState.get(), inputrec.get(), useModularSimulator);
 973     }
 974
 975     /* NM and TPI parallelize over force/energy calculations, not atoms,
 976      * so we need to initialize and broadcast the global state.
 977      */
 978     if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
 979     {
 980         if (!MASTER(cr))
 981         {
 982             globalState = std::make_unique<t_state>();
 983         }
 984         broadcastStateWithoutDynamics(cr->mpiDefaultCommunicator, DOMAINDECOMP(cr), PAR(cr),
 985                                       globalState.get());
 986     }
 987
 988     /* A parallel command line option consistency check that we can
 989        only do after any threads have started. */
 990     if (!PAR(cr)
 991         && (domdecOptions.numCells[XX] > 1 || domdecOptions.numCells[YY] > 1
 992             || domdecOptions.numCells[ZZ] > 1 || domdecOptions.numPmeRanks > 0))
 993     {
 994         gmx_fatal(FARGS,
 995                   "The -dd or -npme option request a parallel simulation, "
 996 #if !GMX_MPI
 997                   "but %s was compiled without threads or MPI enabled",
 998                   output_env_get_program_display_name(oenv));
 999 #elif GMX_THREAD_MPI
1000                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
1001 #else
1002                   "but %s was not started through mpirun/mpiexec or only one rank was requested "
1003                   "through mpirun/mpiexec",
1004                   output_env_get_program_display_name(oenv));
1005 #endif
1006     }
1007
1008     if (doRerun && (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
1009     {
1010         gmx_fatal(FARGS,
1011                   "The .mdp file specified an energy mininization or normal mode algorithm, and "
1012                   "these are not compatible with mdrun -rerun");
1013     }
1014
1015     if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
1016     {
1017         if (domdecOptions.numPmeRanks > 0)
1018         {
1019             gmx_fatal_collective(FARGS, cr->mpiDefaultCommunicator, MASTER(cr),
1020                                  "PME-only ranks are requested, but the system does not use PME "
1021                                  "for electrostatics or LJ");
1022         }
1023
1024         domdecOptions.numPmeRanks = 0;
1025     }
1026
1027     if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
1028     {
1029         /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
1030          * improve performance with many threads per GPU, since our OpenMP
1031          * scaling is bad, but it's difficult to automate the setup.
1032          */
1033         domdecOptions.numPmeRanks = 0;
1034     }
1035     if (useGpuForPme)
1036     {
1037         if (domdecOptions.numPmeRanks < 0)
1038         {
1039             domdecOptions.numPmeRanks = 0;
1040             // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
1041         }
1042         else
1043         {
1044             GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1,
1045                                "PME GPU decomposition is not supported");
1046         }
1047     }
1048
1049 #if GMX_FAHCORE
1050     if (MASTER(cr))
1051     {
1052         fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
1053     }
1054 #endif
1055
1056     /* NMR restraints must be initialized before load_checkpoint,
1057      * since with time averaging the history is added to t_state.
1058      * For proper consistency check we therefore need to extend
1059      * t_state here.
1060      * So the PME-only nodes (if present) will also initialize
1061      * the distance restraints.
1062      */
1063
1064     /* This needs to be called before read_checkpoint to extend the state */
1065     t_disresdata* disresdata;
1066     snew(disresdata, 1);
1067     init_disres(fplog, &mtop, inputrec.get(), DisResRunMode::MDRun,
1068                 MASTER(cr) ? DDRole::Master : DDRole::Agent,
1069                 PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mysim, ms, disresdata,
1070                 globalState.get(), replExParams.exchangeInterval > 0);
1071
1072     t_oriresdata* oriresdata;
1073     snew(oriresdata, 1);
1074     init_orires(fplog, &mtop, inputrec.get(), cr, ms, globalState.get(), oriresdata);
1075
1076     auto deform = prepareBoxDeformation(
1077             globalState != nullptr ? globalState->box : box, MASTER(cr) ? DDRole::Master : DDRole::Agent,
1078             PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mygroup, *inputrec);
1079
1080     ObservablesHistory observablesHistory = {};
1081
1082     auto modularSimulatorCheckpointData = std::make_unique<ReadCheckpointDataHolder>();
1083     if (startingBehavior != StartingBehavior::NewSimulation)
1084     {
1085         /* Check if checkpoint file exists before doing continuation.
1086          * This way we can use identical input options for the first and subsequent runs...
1087          */
1088         if (mdrunOptions.numStepsCommandline > -2)
1089         {
1090             /* Temporarily set the number of steps to unlimited to avoid
1091              * triggering the nsteps check in load_checkpoint().
1092              * This hack will go away soon when the -nsteps option is removed.
1093              */
1094             inputrec->nsteps = -1;
1095         }
1096
1097         // Finish applying initial simulation state information from external sources on all ranks.
1098         // Reconcile checkpoint file data with Mdrunner state established up to this point.
1099         applyLocalState(*inputHolder_.get(), logFileHandle, cr, domdecOptions.numCells,
1100                         inputrec.get(), globalState.get(), &observablesHistory,
1101                         mdrunOptions.reproducible, mdModules_->notifier(),
1102                         modularSimulatorCheckpointData.get(), useModularSimulator);
1103         // TODO: (#3652) Synchronize filesystem state, SimulationInput contents, and program
1104         //  invariants
1105         //  on all code paths.
1106         // Write checkpoint or provide hook to update SimulationInput.
1107         // If there was a checkpoint file, SimulationInput contains more information
1108         // than if there wasn't. At this point, we have synchronized the in-memory
1109         // state with the filesystem state only for restarted simulations. We should
1110         // be calling applyLocalState unconditionally and expect that the completeness
1111         // of SimulationInput is not dependent on its creation method.
1112
1113         if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
1114         {
1115             // Now we can start normal logging to the truncated log file.
1116             fplog = gmx_fio_getfp(logFileHandle);
1117             prepareLogAppending(fplog);
1118             logOwner = buildLogger(fplog, MASTER(cr));
1119             mdlog    = logOwner.logger();
1120         }
1121     }
1122
1123     if (mdrunOptions.numStepsCommandline > -2)
1124     {
1125         GMX_LOG(mdlog.info)
1126                 .asParagraph()
1127                 .appendText(
1128                         "The -nsteps functionality is deprecated, and may be removed in a future "
1129                         "version. "
1130                         "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp "
1131                         "file field.");
1132     }
1133     /* override nsteps with value set on the commandline */
1134     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec.get());
1135
1136     if (isSimulationMasterRank)
1137     {
1138         copy_mat(globalState->box, box);
1139     }
1140
1141     if (PAR(cr))
1142     {
1143         gmx_bcast(sizeof(box), box, cr->mpiDefaultCommunicator);
1144     }
1145
1146     if (inputrec->cutoff_scheme != ecutsVERLET)
1147     {
1148         gmx_fatal(FARGS,
1149                   "This group-scheme .tpr file can no longer be run by mdrun. Please update to the "
1150                   "Verlet scheme, or use an earlier version of GROMACS if necessary.");
1151     }
1152     /* Update rlist and nstlist. */
1153     /* Note: prepare_verlet_scheme is calling increaseNstlist(...), which (while attempting to
1154      * increase rlist) tries to check if the newly chosen value fits with the DD scheme. As this is
1155      * run before any DD scheme is set up, this check is never executed. See #3334 for more details.
1156      */
1157     prepare_verlet_scheme(fplog, cr, inputrec.get(), nstlist_cmdline, &mtop, box,
1158                           useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes),
1159                           *hwinfo->cpuInfo);
1160
1161     // This builder is necessary while we have multi-part construction
1162     // of DD. Before DD is constructed, we use the existence of
1163     // the builder object to indicate that further construction of DD
1164     // is needed.
1165     std::unique_ptr<DomainDecompositionBuilder> ddBuilder;
1166     if (useDomainDecomposition)
1167     {
1168         ddBuilder = std::make_unique<DomainDecompositionBuilder>(
1169                 mdlog, cr, domdecOptions, mdrunOptions, mtop, *inputrec, box,
1170                 positionsFromStatePointer(globalState.get()));
1171     }
1172     else
1173     {
1174         /* PME, if used, is done on all nodes with 1D decomposition */
1175         cr->nnodes     = cr->sizeOfDefaultCommunicator;
1176         cr->sim_nodeid = cr->rankInDefaultCommunicator;
1177         cr->nodeid     = cr->rankInDefaultCommunicator;
1178         cr->npmenodes  = 0;
1179         cr->duty       = (DUTY_PP | DUTY_PME);
1180
1181         if (inputrec->pbcType == PbcType::Screw)
1182         {
1183             gmx_fatal(FARGS, "pbc=screw is only implemented with domain decomposition");
1184         }
1185     }
1186
1187     // Produce the task assignment for this rank - done after DD is constructed
1188     GpuTaskAssignments gpuTaskAssignments = GpuTaskAssignmentsBuilder::build(
1189             gpuIdsToUse, userGpuTaskAssignment, *hwinfo, simulationCommunicator, physicalNodeComm,
1190             nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded,
1191             useGpuForPme, thisRankHasDuty(cr, DUTY_PP),
1192             // TODO cr->duty & DUTY_PME should imply that a PME
1193             // algorithm is active, but currently does not.
1194             EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
1195
1196     // Get the device handles for the modules, nullptr when no task is assigned.
1197     int                deviceId   = -1;
1198     DeviceInformation* deviceInfo = gpuTaskAssignments.initDevice(&deviceId);
1199
1200     // timing enabling - TODO put this in gpu_utils (even though generally this is just option handling?)
1201     bool useTiming = true;
1202
1203     if (GMX_GPU_CUDA)
1204     {
1205         /* WARNING: CUDA timings are incorrect with multiple streams.
1206          *          This is the main reason why they are disabled by default.
1207          */
1208         // TODO: Consider turning on by default when we can detect nr of streams.
1209         useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr);
1210     }
1211     else if (GMX_GPU_OPENCL)
1212     {
1213         useTiming = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
1214     }
1215
1216     // TODO Currently this is always built, yet DD partition code
1217     // checks if it is built before using it. Probably it should
1218     // become an MDModule that is made only when another module
1219     // requires it (e.g. pull, CompEl, density fitting), so that we
1220     // don't update the local atom sets unilaterally every step.
1221     LocalAtomSetManager atomSets;
1222     if (ddBuilder)
1223     {
1224         // TODO Pass the GPU streams to ddBuilder to use in buffer
1225         // transfers (e.g. halo exchange)
1226         cr->dd = ddBuilder->build(&atomSets);
1227         // The builder's job is done, so destruct it
1228         ddBuilder.reset(nullptr);
1229         // Note that local state still does not exist yet.
1230     }
1231
1232     // The GPU update is decided here because we need to know whether the constraints or
1233     // SETTLEs can span accross the domain borders (i.e. whether or not update groups are
1234     // defined). This is only known after DD is initialized, hence decision on using GPU
1235     // update is done so late.
1236     try
1237     {
1238         const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false;
1239
1240         useGpuForUpdate = decideWhetherToUseGpuForUpdate(
1241                 useDomainDecomposition, useUpdateGroups, pmeRunMode, domdecOptions.numPmeRanks > 0,
1242                 useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop,
1243                 doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
1244                 replExParams.exchangeInterval > 0, doRerun, devFlags, mdlog);
1245     }
1246     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1247
1248     const bool printHostName = (cr->nnodes > 1);
1249     gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
1250
1251     const bool disableNonbondedCalculation = (getenv("GMX_NO_NONBONDED") != nullptr);
1252     if (disableNonbondedCalculation)
1253     {
1254         /* turn off non-bonded calculations */
1255         GMX_LOG(mdlog.warning)
1256                 .asParagraph()
1257                 .appendText(
1258                         "Found environment variable GMX_NO_NONBONDED.\n"
1259                         "Disabling nonbonded calculations.");
1260     }
1261
1262     MdrunScheduleWorkload runScheduleWork;
1263
1264     bool useGpuDirectHalo = decideWhetherToUseGpuForHalo(
1265             devFlags, havePPDomainDecomposition(cr), useGpuForNonbonded, useModularSimulator,
1266             doRerun, EI_ENERGY_MINIMIZATION(inputrec->eI));
1267
1268     // Also populates the simulation constant workload description.
1269     runScheduleWork.simulationWork = createSimulationWorkload(
1270             *inputrec, disableNonbondedCalculation, devFlags, useGpuForNonbonded, pmeRunMode,
1271             useGpuForBonded, useGpuForUpdate, useGpuDirectHalo);
1272
1273     std::unique_ptr<DeviceStreamManager> deviceStreamManager = nullptr;
1274
1275     if (deviceInfo != nullptr)
1276     {
1277         if (DOMAINDECOMP(cr) && thisRankHasDuty(cr, DUTY_PP))
1278         {
1279             dd_setup_dlb_resource_sharing(cr, deviceId);
1280         }
1281         deviceStreamManager = std::make_unique<DeviceStreamManager>(
1282                 *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useTiming);
1283     }
1284
1285     // If the user chose a task assignment, give them some hints
1286     // where appropriate.
1287     if (!userGpuTaskAssignment.empty())
1288     {
1289         gpuTaskAssignments.logPerformanceHints(mdlog, numDevicesToUse);
1290     }
1291
1292     if (PAR(cr))
1293     {
1294         /* After possible communicator splitting in make_dd_communicators.
1295          * we can set up the intra/inter node communication.
1296          */
1297         gmx_setup_nodecomm(fplog, cr);
1298     }
1299
1300 #if GMX_MPI
1301     if (isMultiSim(ms))
1302     {
1303         GMX_LOG(mdlog.warning)
1304                 .asParagraph()
1305                 .appendTextFormatted(
1306                         "This is simulation %d out of %d running as a composite GROMACS\n"
1307                         "multi-simulation job. Setup for this simulation:\n",
1308                         ms->simulationIndex_, ms->numSimulations_);
1309     }
1310     GMX_LOG(mdlog.warning)
1311             .appendTextFormatted("Using %d MPI %s\n", cr->nnodes,
1312 #    if GMX_THREAD_MPI
1313                                  cr->nnodes == 1 ? "thread" : "threads"
1314 #    else
1315                                  cr->nnodes == 1 ? "process" : "processes"
1316 #    endif
1317             );
1318     fflush(stderr);
1319 #endif
1320
1321     // If mdrun -pin auto honors any affinity setting that already
1322     // exists. If so, it is nice to provide feedback about whether
1323     // that existing affinity setting was from OpenMP or something
1324     // else, so we run this code both before and after we initialize
1325     // the OpenMP support.
1326     gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
1327     /* Check and update the number of OpenMP threads requested */
1328     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
1329                                             pmeRunMode, mtop, *inputrec);
1330
1331     gmx_omp_nthreads_init(mdlog, cr, hwinfo->nthreads_hw_avail, physicalNodeComm.size_,
1332                           hw_opt.nthreads_omp, hw_opt.nthreads_omp_pme, !thisRankHasDuty(cr, DUTY_PP));
1333
1334     // Enable FP exception detection, but not in
1335     // Release mode and not for compilers with known buggy FP
1336     // exception support (clang with any optimization) or suspected
1337     // buggy FP exception support (gcc 7.* with optimization).
1338 #if !defined NDEBUG                                                                         \
1339         && !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
1340              && defined __OPTIMIZE__)
1341     const bool bEnableFPE = true;
1342 #else
1343     const bool bEnableFPE = false;
1344 #endif
1345     // FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
1346     if (bEnableFPE)
1347     {
1348         gmx_feenableexcept();
1349     }
1350
1351     /* Now that we know the setup is consistent, check for efficiency */
1352     check_resource_division_efficiency(hwinfo, gpuTaskAssignments.thisRankHasAnyGpuTask(),
1353                                        mdrunOptions.ntompOptionIsSet, cr, mdlog);
1354
1355     /* getting number of PP/PME threads on this MPI / tMPI rank.
1356        PME: env variable should be read only on one node to make sure it is
1357        identical everywhere;
1358      */
1359     const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded)
1360                                                                   : gmx_omp_nthreads_get(emntPME);
1361     checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo->hardwareTopology,
1362                                   physicalNodeComm, mdlog);
1363
1364     // Enable Peer access between GPUs where available
1365     // Only for DD, only master PP rank needs to perform setup, and only if thread MPI plus
1366     // any of the GPU communication features are active.
1367     if (DOMAINDECOMP(cr) && MASTER(cr) && thisRankHasDuty(cr, DUTY_PP) && GMX_THREAD_MPI
1368         && (runScheduleWork.simulationWork.useGpuHaloExchange
1369             || runScheduleWork.simulationWork.useGpuPmePpCommunication))
1370     {
1371         setupGpuDevicePeerAccess(gpuIdsToUse, mdlog);
1372     }
1373
1374     if (hw_opt.threadAffinity != ThreadAffinity::Off)
1375     {
1376         /* Before setting affinity, check whether the affinity has changed
1377          * - which indicates that probably the OpenMP library has changed it
1378          * since we first checked).
1379          */
1380         gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
1381
1382         int numThreadsOnThisNode, intraNodeThreadOffset;
1383         analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
1384                                  &intraNodeThreadOffset);
1385
1386         /* Set the CPU affinity */
1387         gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology, numThreadsOnThisRank,
1388                                 numThreadsOnThisNode, intraNodeThreadOffset, nullptr);
1389     }
1390
1391     if (mdrunOptions.timingOptions.resetStep > -1)
1392     {
1393         GMX_LOG(mdlog.info)
1394                 .asParagraph()
1395                 .appendText(
1396                         "The -resetstep functionality is deprecated, and may be removed in a "
1397                         "future version.");
1398     }
1399     wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1400
1401     if (PAR(cr))
1402     {
1403         /* Master synchronizes its value of reset_counters with all nodes
1404          * including PME only nodes */
1405         int64_t reset_counters = wcycle_get_reset_counters(wcycle);
1406         gmx_bcast(sizeof(reset_counters), &reset_counters, cr->mpi_comm_mysim);
1407         wcycle_set_reset_counters(wcycle, reset_counters);
1408     }
1409
1410     // Membrane embedding must be initialized before we call init_forcerec()
1411     membedHolder.initializeMembed(fplog, filenames.size(), filenames.data(), &mtop, inputrec.get(),
1412                                   globalState.get(), cr, &mdrunOptions.checkpointOptions.period);
1413
1414     const bool               thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
1415     std::unique_ptr<MDAtoms> mdAtoms;
1416     std::unique_ptr<VirtualSitesHandler> vsite;
1417     std::unique_ptr<GpuBonded>           gpuBonded;
1418
1419     t_nrnb nrnb;
1420     if (thisRankHasDuty(cr, DUTY_PP))
1421     {
1422         mdModulesNotifier.notify(*cr);
1423         mdModulesNotifier.notify(&atomSets);
1424         mdModulesNotifier.notify(inputrec->pbcType);
1425         mdModulesNotifier.notify(SimulationTimeStep{ inputrec->delta_t });
1426         /* Initiate forcerecord */
1427         fr                 = new t_forcerec;
1428         fr->forceProviders = mdModules_->initForceProviders();
1429         init_forcerec(fplog, mdlog, fr, inputrec.get(), &mtop, cr, box,
1430                       opt2fn("-table", filenames.size(), filenames.data()),
1431                       opt2fn("-tablep", filenames.size(), filenames.data()),
1432                       opt2fns("-tableb", filenames.size(), filenames.data()), pforce);
1433         // Dirty hack, for fixing disres and orires should be made mdmodules
1434         fr->fcdata->disres = disresdata;
1435         fr->fcdata->orires = oriresdata;
1436
1437         // Save a handle to device stream manager to use elsewhere in the code
1438         // TODO: Forcerec is not a correct place to store it.
1439         fr->deviceStreamManager = deviceStreamManager.get();
1440
1441         if (runScheduleWork.simulationWork.useGpuPmePpCommunication && !thisRankHasDuty(cr, DUTY_PME))
1442         {
1443             GMX_RELEASE_ASSERT(
1444                     deviceStreamManager != nullptr,
1445                     "GPU device stream manager should be valid in order to use PME-PP direct "
1446                     "communications.");
1447             GMX_RELEASE_ASSERT(
1448                     deviceStreamManager->streamIsValid(DeviceStreamType::PmePpTransfer),
1449                     "GPU PP-PME stream should be valid in order to use GPU PME-PP direct "
1450                     "communications.");
1451             fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(
1452                     cr->mpi_comm_mysim, cr->dd->pme_nodeid, deviceStreamManager->context(),
1453                     deviceStreamManager->stream(DeviceStreamType::PmePpTransfer));
1454         }
1455
1456         fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo,
1457                                         runScheduleWork.simulationWork.useGpuNonbonded,
1458                                         deviceStreamManager.get(), &mtop, box, wcycle);
1459         // TODO: Move the logic below to a GPU bonded builder
1460         if (runScheduleWork.simulationWork.useGpuBonded)
1461         {
1462             GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
1463                                "GPU device stream manager should be valid in order to use GPU "
1464                                "version of bonded forces.");
1465             gpuBonded = std::make_unique<GpuBonded>(
1466                     mtop.ffparams, fr->ic->epsfac * fr->fudgeQQ, deviceStreamManager->context(),
1467                     deviceStreamManager->bondedStream(havePPDomainDecomposition(cr)), wcycle);
1468             fr->gpuBonded = gpuBonded.get();
1469         }
1470
1471         /* Initialize the mdAtoms structure.
1472          * mdAtoms is not filled with atom data,
1473          * as this can not be done now with domain decomposition.
1474          */
1475         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1476         if (globalState && thisRankHasPmeGpuTask)
1477         {
1478             // The pinning of coordinates in the global state object works, because we only use
1479             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1480             // points to the global state object without DD.
1481             // FIXME: MD and EM separately set up the local state - this should happen in the same
1482             // function, which should also perform the pinning.
1483             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1484         }
1485
1486         /* Initialize the virtual site communication */
1487         vsite = makeVirtualSitesHandler(mtop, cr, fr->pbcType);
1488
1489         calc_shifts(box, fr->shift_vec);
1490
1491         /* With periodic molecules the charge groups should be whole at start up
1492          * and the virtual sites should not be far from their proper positions.
1493          */
1494         if (!inputrec->bContinuation && MASTER(cr)
1495             && !(inputrec->pbcType != PbcType::No && inputrec->bPeriodicMols))
1496         {
1497             /* Make molecules whole at start of run */
1498             if (fr->pbcType != PbcType::No)
1499             {
1500                 do_pbc_first_mtop(fplog, inputrec->pbcType, box, &mtop, globalState->x.rvec_array());
1501             }
1502             if (vsite)
1503             {
1504                 /* Correct initial vsite positions are required
1505                  * for the initial distribution in the domain decomposition
1506                  * and for the initial shell prediction.
1507                  */
1508                 constructVirtualSitesGlobal(mtop, globalState->x);
1509             }
1510         }
1511
1512         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1513         {
1514             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1515             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1516         }
1517     }
1518     else
1519     {
1520         /* This is a PME only node */
1521
1522         GMX_ASSERT(globalState == nullptr,
1523                    "We don't need the state on a PME only rank and expect it to be unitialized");
1524
1525         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1526         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1527     }
1528
1529     gmx_pme_t* sepPmeData = nullptr;
1530     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1531     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr),
1532                "Double-checking that only PME-only ranks have no forcerec");
1533     gmx_pme_t*& pmedata = fr ? fr->pmedata : sepPmeData;
1534
1535     // TODO should live in ewald module once its testing is improved
1536     //
1537     // Later, this program could contain kernels that might be later
1538     // re-used as auto-tuning progresses, or subsequent simulations
1539     // are invoked.
1540     PmeGpuProgramStorage pmeGpuProgram;
1541     if (thisRankHasPmeGpuTask)
1542     {
1543         GMX_RELEASE_ASSERT(
1544                 (deviceStreamManager != nullptr),
1545                 "GPU device stream manager should be initialized in order to use GPU for PME.");
1546         GMX_RELEASE_ASSERT((deviceInfo != nullptr),
1547                            "GPU device should be initialized in order to use GPU for PME.");
1548         pmeGpuProgram = buildPmeGpuProgram(deviceStreamManager->context());
1549     }
1550
1551     /* Initiate PME if necessary,
1552      * either on all nodes or on dedicated PME nodes only. */
1553     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1554     {
1555         if (mdAtoms && mdAtoms->mdatoms())
1556         {
1557             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1558             if (EVDW_PME(inputrec->vdwtype))
1559             {
1560                 nTypePerturbed = mdAtoms->mdatoms()->nTypePerturbed;
1561             }
1562         }
1563         if (cr->npmenodes > 0)
1564         {
1565             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1566             gmx_bcast(sizeof(nChargePerturbed), &nChargePerturbed, cr->mpi_comm_mysim);
1567             gmx_bcast(sizeof(nTypePerturbed), &nTypePerturbed, cr->mpi_comm_mysim);
1568         }
1569
1570         if (thisRankHasDuty(cr, DUTY_PME))
1571         {
1572             try
1573             {
1574                 // TODO: This should be in the builder.
1575                 GMX_RELEASE_ASSERT(!runScheduleWork.simulationWork.useGpuPme
1576                                            || (deviceStreamManager != nullptr),
1577                                    "Device stream manager should be valid in order to use GPU "
1578                                    "version of PME.");
1579                 GMX_RELEASE_ASSERT(
1580                         !runScheduleWork.simulationWork.useGpuPme
1581                                 || deviceStreamManager->streamIsValid(DeviceStreamType::Pme),
1582                         "GPU PME stream should be valid in order to use GPU version of PME.");
1583
1584                 const DeviceContext* deviceContext = runScheduleWork.simulationWork.useGpuPme
1585                                                              ? &deviceStreamManager->context()
1586                                                              : nullptr;
1587                 const DeviceStream* pmeStream =
1588                         runScheduleWork.simulationWork.useGpuPme
1589                                 ? &deviceStreamManager->stream(DeviceStreamType::Pme)
1590                                 : nullptr;
1591
1592                 pmedata = gmx_pme_init(cr, getNumPmeDomains(cr->dd), inputrec.get(),
1593                                        nChargePerturbed != 0, nTypePerturbed != 0,
1594                                        mdrunOptions.reproducible, ewaldcoeff_q, ewaldcoeff_lj,
1595                                        gmx_omp_nthreads_get(emntPME), pmeRunMode, nullptr,
1596                                        deviceContext, pmeStream, pmeGpuProgram.get(), mdlog);
1597             }
1598             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1599         }
1600     }
1601
1602
1603     if (EI_DYNAMICS(inputrec->eI))
1604     {
1605         /* Turn on signal handling on all nodes */
1606         /*
1607          * (A user signal from the PME nodes (if any)
1608          * is communicated to the PP nodes.
1609          */
1610         signal_handler_install();
1611     }
1612
1613     pull_t* pull_work = nullptr;
1614     if (thisRankHasDuty(cr, DUTY_PP))
1615     {
1616         /* Assumes uniform use of the number of OpenMP threads */
1617         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
1618
1619         if (inputrec->bPull)
1620         {
1621             /* Initialize pull code */
1622             pull_work = init_pull(fplog, inputrec->pull, inputrec.get(), &mtop, cr, &atomSets,
1623                                   inputrec->fepvals->init_lambda);
1624             if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
1625             {
1626                 initPullHistory(pull_work, &observablesHistory);
1627             }
1628             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1629             {
1630                 init_pull_output_files(pull_work, filenames.size(), filenames.data(), oenv, startingBehavior);
1631             }
1632         }
1633
1634         std::unique_ptr<EnforcedRotation> enforcedRotation;
1635         if (inputrec->bRot)
1636         {
1637             /* Initialize enforced rotation code */
1638             enforcedRotation = init_rot(fplog, inputrec.get(), filenames.size(), filenames.data(),
1639                                         cr, &atomSets, globalState.get(), &mtop, oenv, mdrunOptions,
1640                                         startingBehavior);
1641         }
1642
1643         t_swap* swap = nullptr;
1644         if (inputrec->eSwapCoords != eswapNO)
1645         {
1646             /* Initialize ion swapping code */
1647             swap = init_swapcoords(fplog, inputrec.get(),
1648                                    opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
1649                                    &mtop, globalState.get(), &observablesHistory, cr, &atomSets,
1650                                    oenv, mdrunOptions, startingBehavior);
1651         }
1652
1653         /* Let makeConstraints know whether we have essential dynamics constraints. */
1654         auto constr = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics, fplog, cr,
1655                                       ms, &nrnb, wcycle, fr->bMolPBC);
1656
1657         /* Energy terms and groups */
1658         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
1659                              inputrec->fepvals->n_lambda);
1660
1661         /* Kinetic energy data */
1662         gmx_ekindata_t ekind;
1663         init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel);
1664
1665         /* Set up interactive MD (IMD) */
1666         auto imdSession =
1667                 makeImdSession(inputrec.get(), cr, wcycle, &enerd, ms, &mtop, mdlog,
1668                                MASTER(cr) ? globalState->x.rvec_array() : nullptr, filenames.size(),
1669                                filenames.data(), oenv, mdrunOptions.imdOptions, startingBehavior);
1670
1671         if (DOMAINDECOMP(cr))
1672         {
1673             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1674             /* This call is not included in init_domain_decomposition mainly
1675              * because fr->cginfo_mb is set later.
1676              */
1677             dd_init_bondeds(fplog, cr->dd, mtop, vsite.get(), inputrec.get(),
1678                             domdecOptions.checkBondedInteractions, fr->cginfo_mb);
1679         }
1680
1681         if (runScheduleWork.simulationWork.useGpuBufferOps)
1682         {
1683             fr->gpuForceReduction[gmx::AtomLocality::Local] = std::make_unique<gmx::GpuForceReduction>(
1684                     deviceStreamManager->context(),
1685                     deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedLocal));
1686             fr->gpuForceReduction[gmx::AtomLocality::NonLocal] = std::make_unique<gmx::GpuForceReduction>(
1687                     deviceStreamManager->context(),
1688                     deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedNonLocal));
1689         }
1690
1691         std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
1692         if (gpusWereDetected
1693             && ((runScheduleWork.simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME))
1694                 || runScheduleWork.simulationWork.useGpuBufferOps))
1695         {
1696             GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator)
1697                                                       ? GpuApiCallBehavior::Async
1698                                                       : GpuApiCallBehavior::Sync;
1699             GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
1700                                "GPU device stream manager should be initialized to use GPU.");
1701             stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
1702                     *deviceStreamManager, transferKind, pme_gpu_get_block_size(fr->pmedata), wcycle);
1703             fr->stateGpu = stateGpu.get();
1704         }
1705
1706         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
1707         SimulatorBuilder simulatorBuilder;
1708
1709         simulatorBuilder.add(SimulatorStateData(globalState.get(), &observablesHistory, &enerd, &ekind));
1710         simulatorBuilder.add(std::move(membedHolder));
1711         simulatorBuilder.add(std::move(stopHandlerBuilder_));
1712         simulatorBuilder.add(SimulatorConfig(mdrunOptions, startingBehavior, &runScheduleWork));
1713
1714
1715         simulatorBuilder.add(SimulatorEnv(fplog, cr, ms, mdlog, oenv));
1716         simulatorBuilder.add(Profiling(&nrnb, walltime_accounting, wcycle));
1717         simulatorBuilder.add(ConstraintsParam(
1718                 constr.get(), enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
1719                 vsite.get()));
1720         // TODO: Separate `fr` to a separate add, and make the `build` handle the coupling sensibly.
1721         simulatorBuilder.add(LegacyInput(static_cast<int>(filenames.size()), filenames.data(),
1722                                          inputrec.get(), fr));
1723         simulatorBuilder.add(ReplicaExchangeParameters(replExParams));
1724         simulatorBuilder.add(InteractiveMD(imdSession.get()));
1725         simulatorBuilder.add(SimulatorModules(mdModules_->outputProvider(), mdModules_->notifier()));
1726         simulatorBuilder.add(CenterOfMassPulling(pull_work));
1727         // Todo move to an MDModule
1728         simulatorBuilder.add(IonSwapping(swap));
1729         simulatorBuilder.add(TopologyData(&mtop, mdAtoms.get()));
1730         simulatorBuilder.add(BoxDeformationHandle(deform.get()));
1731         simulatorBuilder.add(std::move(modularSimulatorCheckpointData));
1732
1733         // build and run simulator object based on user-input
1734         auto simulator = simulatorBuilder.build(useModularSimulator);
1735         simulator->run();
1736
1737         if (fr->pmePpCommGpu)
1738         {
1739             // destroy object since it is no longer required. (This needs to be done while the GPU context still exists.)
1740             fr->pmePpCommGpu.reset();
1741         }
1742
1743         if (inputrec->bPull)
1744         {
1745             finish_pull(pull_work);
1746         }
1747         finish_swapcoords(swap);
1748     }
1749     else
1750     {
1751         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
1752         /* do PME only */
1753         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
1754         gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec.get(), pmeRunMode,
1755                     deviceStreamManager.get());
1756     }
1757
1758     wallcycle_stop(wcycle, ewcRUN);
1759
1760     /* Finish up, write some stuff
1761      * if rerunMD, don't write last frame again
1762      */
1763     finish_run(fplog, mdlog, cr, inputrec.get(), &nrnb, wcycle, walltime_accounting,
1764                fr ? fr->nbv.get() : nullptr, pmedata, EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
1765
1766     // clean up cycle counter
1767     wallcycle_destroy(wcycle);
1768
1769     deviceStreamManager.reset(nullptr);
1770     // Free PME data
1771     if (pmedata)
1772     {
1773         gmx_pme_destroy(pmedata);
1774         pmedata = nullptr;
1775     }
1776
1777     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
1778     // before we destroy the GPU context(s)
1779     // Pinned buffers are associated with contexts in CUDA.
1780     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
1781     mdAtoms.reset(nullptr);
1782     globalState.reset(nullptr);
1783     mdModules_.reset(nullptr); // destruct force providers here as they might also use the GPU
1784     gpuBonded.reset(nullptr);
1785     /* Free pinned buffers in *fr */
1786     delete fr;
1787     fr = nullptr;
1788     // TODO convert to C++ so we can get rid of these frees
1789     sfree(disresdata);
1790     sfree(oriresdata);
1791
1792     if (!hwinfo->deviceInfoList.empty())
1793     {
1794         /* stop the GPU profiler (only CUDA) */
1795         stopGpuProfiler();
1796     }
1797
1798     /* With tMPI we need to wait for all ranks to finish deallocation before
1799      * destroying the CUDA context as some tMPI ranks may be sharing
1800      * GPU and context.
1801      *
1802      * This is not a concern in OpenCL where we use one context per rank.
1803      *
1804      * Note: it is safe to not call the barrier on the ranks which do not use GPU,
1805      * but it is easier and more futureproof to call it on the whole node.
1806      *
1807      * Note that this function needs to be called even if GPUs are not used
1808      * in this run because the PME ranks have no knowledge of whether GPUs
1809      * are used or not, but all ranks need to enter the barrier below.
1810      * \todo Remove this physical node barrier after making sure
1811      * that it's not needed anymore (with a shared GPU run).
1812      */
1813     if (GMX_THREAD_MPI)
1814     {
1815         physicalNodeComm.barrier();
1816     }
1817     releaseDevice(deviceInfo);
1818
1819     /* Does what it says */
1820     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
1821     walltime_accounting_destroy(walltime_accounting);
1822
1823     // Ensure log file content is written
1824     if (logFileHandle)
1825     {
1826         gmx_fio_flush(logFileHandle);
1827     }
1828
1829     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
1830      * exceptions were enabled before function was called. */
1831     if (bEnableFPE)
1832     {
1833         gmx_fedisableexcept();
1834     }
1835
1836     auto rc = static_cast<int>(gmx_get_stop_condition());
1837
1838 #if GMX_THREAD_MPI
1839     /* we need to join all threads. The sub-threads join when they
1840        exit this function, but the master thread needs to be told to
1841        wait for that. */
1842     if (MASTER(cr))
1843     {
1844         tMPI_Finalize();
1845     }
1846 #endif
1847     return rc;
1848 } // namespace gmx
1849
1850 Mdrunner::~Mdrunner()
1851 {
1852     // Clean up of the Manager.
1853     // This will end up getting called on every thread-MPI rank, which is unnecessary,
1854     // but okay as long as threads synchronize some time before adding or accessing
1855     // a new set of restraints.
1856     if (restraintManager_)
1857     {
1858         restraintManager_->clear();
1859         GMX_ASSERT(restraintManager_->countRestraints() == 0,
1860                    "restraints added during runner life time should be cleared at runner "
1861                    "destruction.");
1862     }
1863 };
1864
1865 void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller, const std::string& name)
1866 {
1867     GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
1868     // Not sure if this should be logged through the md logger or something else,
1869     // but it is helpful to have some sort of INFO level message sent somewhere.
1870     //    std::cout << "Registering restraint named " << name << std::endl;
1871
1872     // When multiple restraints are used, it may be wasteful to register them separately.
1873     // Maybe instead register an entire Restraint Manager as a force provider.
1874     restraintManager_->addToSpec(std::move(puller), name);
1875 }
1876
1877 Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules) : mdModules_(std::move(mdModules)) {}
1878
1879 Mdrunner::Mdrunner(Mdrunner&&) noexcept = default;
1880
1881 Mdrunner& Mdrunner::operator=(Mdrunner&& /*handle*/) noexcept = default;
1882
1883 class Mdrunner::BuilderImplementation
1884 {
1885 public:
1886     BuilderImplementation() = delete;
1887     BuilderImplementation(std::unique_ptr<MDModules> mdModules, compat::not_null<SimulationContext*> context);
1888     ~BuilderImplementation();
1889
1890     BuilderImplementation& setExtraMdrunOptions(const MdrunOptions& options,
1891                                                 real                forceWarningThreshold,
1892                                                 StartingBehavior    startingBehavior);
1893
1894     void addDomdec(const DomdecOptions& options);
1895
1896     void addInput(SimulationInputHandle inputHolder);
1897
1898     void addVerletList(int nstlist);
1899
1900     void addReplicaExchange(const ReplicaExchangeParameters& params);
1901
1902     void addNonBonded(const char* nbpu_opt);
1903
1904     void addPME(const char* pme_opt_, const char* pme_fft_opt_);
1905
1906     void addBondedTaskAssignment(const char* bonded_opt);
1907
1908     void addUpdateTaskAssignment(const char* update_opt);
1909
1910     void addHardwareOptions(const gmx_hw_opt_t& hardwareOptions);
1911
1912     void addFilenames(ArrayRef<const t_filenm> filenames);
1913
1914     void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
1915
1916     void addLogFile(t_fileio* logFileHandle);
1917
1918     void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
1919
1920     Mdrunner build();
1921
1922 private:
1923     // Default parameters copied from runner.h
1924     // \todo Clarify source(s) of default parameters.
1925
1926     const char* nbpu_opt_    = nullptr;
1927     const char* pme_opt_     = nullptr;
1928     const char* pme_fft_opt_ = nullptr;
1929     const char* bonded_opt_  = nullptr;
1930     const char* update_opt_  = nullptr;
1931
1932     MdrunOptions mdrunOptions_;
1933
1934     DomdecOptions domdecOptions_;
1935
1936     ReplicaExchangeParameters replicaExchangeParameters_;
1937
1938     //! Command-line override for the duration of a neighbor list with the Verlet scheme.
1939     int nstlist_ = 0;
1940
1941     //! World communicator, used for hardware detection and task assignment
1942     MPI_Comm libraryWorldCommunicator_ = MPI_COMM_NULL;
1943
1944     //! Multisim communicator handle.
1945     gmx_multisim_t* multiSimulation_;
1946
1947     //! mdrun communicator
1948     MPI_Comm simulationCommunicator_ = MPI_COMM_NULL;
1949
1950     //! Print a warning if any force is larger than this (in kJ/mol nm).
1951     real forceWarningThreshold_ = -1;
1952
1953     //! Whether the simulation will start afresh, or restart with/without appending.
1954     StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
1955
1956     //! The modules that comprise the functionality of mdrun.
1957     std::unique_ptr<MDModules> mdModules_;
1958
1959     //! \brief Parallelism information.
1960     gmx_hw_opt_t hardwareOptions_;
1961
1962     //! filename options for simulation.
1963     ArrayRef<const t_filenm> filenames_;
1964
1965     /*! \brief Handle to output environment.
1966      *
1967      * \todo gmx_output_env_t needs lifetime management.
1968      */
1969     gmx_output_env_t* outputEnvironment_ = nullptr;
1970
1971     /*! \brief Non-owning handle to MD log file.
1972      *
1973      * \todo Context should own output facilities for client.
1974      * \todo Improve log file handle management.
1975      * \internal
1976      * Code managing the FILE* relies on the ability to set it to
1977      * nullptr to check whether the filehandle is valid.
1978      */
1979     t_fileio* logFileHandle_ = nullptr;
1980
1981     /*!
1982      * \brief Builder for simulation stop signal handler.
1983      */
1984     std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
1985
1986     /*!
1987      * \brief Sources for initial simulation state.
1988      *
1989      * See issue #3652 for near-term refinements to the SimulationInput interface.
1990      *
1991      * See issue #3379 for broader discussion on API aspects of simulation inputs and outputs.
1992      */
1993     SimulationInputHandle inputHolder_;
1994 };
1995
1996 Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules,
1997                                                        compat::not_null<SimulationContext*> context) :
1998     mdModules_(std::move(mdModules))
1999 {
2000     libraryWorldCommunicator_ = context->libraryWorldCommunicator_;
2001     simulationCommunicator_   = context->simulationCommunicator_;
2002     multiSimulation_          = context->multiSimulation_.get();
2003 }
2004
2005 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
2006
2007 Mdrunner::BuilderImplementation&
2008 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions&    options,
2009                                                       const real             forceWarningThreshold,
2010                                                       const StartingBehavior startingBehavior)
2011 {
2012     mdrunOptions_          = options;
2013     forceWarningThreshold_ = forceWarningThreshold;
2014     startingBehavior_      = startingBehavior;
2015     return *this;
2016 }
2017
2018 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions& options)
2019 {
2020     domdecOptions_ = options;
2021 }
2022
2023 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
2024 {
2025     nstlist_ = nstlist;
2026 }
2027
2028 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters& params)
2029 {
2030     replicaExchangeParameters_ = params;
2031 }
2032
2033 Mdrunner Mdrunner::BuilderImplementation::build()
2034 {
2035     auto newRunner = Mdrunner(std::move(mdModules_));
2036
2037     newRunner.mdrunOptions     = mdrunOptions_;
2038     newRunner.pforce           = forceWarningThreshold_;
2039     newRunner.startingBehavior = startingBehavior_;
2040     newRunner.domdecOptions    = domdecOptions_;
2041
2042     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
2043     newRunner.hw_opt = hardwareOptions_;
2044
2045     // No invariant to check. This parameter exists to optionally override other behavior.
2046     newRunner.nstlist_cmdline = nstlist_;
2047
2048     newRunner.replExParams = replicaExchangeParameters_;
2049
2050     newRunner.filenames = filenames_;
2051
2052     newRunner.libraryWorldCommunicator = libraryWorldCommunicator_;
2053
2054     newRunner.simulationCommunicator = simulationCommunicator_;
2055
2056     // nullptr is a valid value for the multisim handle
2057     newRunner.ms = multiSimulation_;
2058
2059     if (inputHolder_)
2060     {
2061         newRunner.inputHolder_ = std::move(inputHolder_);
2062     }
2063     else
2064     {
2065         GMX_THROW(gmx::APIError("MdrunnerBuilder::addInput() is required before build()."));
2066     }
2067
2068     // \todo Clarify ownership and lifetime management for gmx_output_env_t
2069     // \todo Update sanity checking when output environment has clearly specified invariants.
2070     // Initialization and default values for oenv are not well specified in the current version.
2071     if (outputEnvironment_)
2072     {
2073         newRunner.oenv = outputEnvironment_;
2074     }
2075     else
2076     {
2077         GMX_THROW(gmx::APIError(
2078                 "MdrunnerBuilder::addOutputEnvironment() is required before build()"));
2079     }
2080
2081     newRunner.logFileHandle = logFileHandle_;
2082
2083     if (nbpu_opt_)
2084     {
2085         newRunner.nbpu_opt = nbpu_opt_;
2086     }
2087     else
2088     {
2089         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
2090     }
2091
2092     if (pme_opt_ && pme_fft_opt_)
2093     {
2094         newRunner.pme_opt     = pme_opt_;
2095         newRunner.pme_fft_opt = pme_fft_opt_;
2096     }
2097     else
2098     {
2099         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
2100     }
2101
2102     if (bonded_opt_)
2103     {
2104         newRunner.bonded_opt = bonded_opt_;
2105     }
2106     else
2107     {
2108         GMX_THROW(gmx::APIError(
2109                 "MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
2110     }
2111
2112     if (update_opt_)
2113     {
2114         newRunner.update_opt = update_opt_;
2115     }
2116     else
2117     {
2118         GMX_THROW(gmx::APIError(
2119                 "MdrunnerBuilder::addUpdateTaskAssignment() is required before build()  "));
2120     }
2121
2122
2123     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
2124
2125     if (stopHandlerBuilder_)
2126     {
2127         newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
2128     }
2129     else
2130     {
2131         newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
2132     }
2133
2134     return newRunner;
2135 }
2136
2137 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
2138 {
2139     nbpu_opt_ = nbpu_opt;
2140 }
2141
2142 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt, const char* pme_fft_opt)
2143 {
2144     pme_opt_     = pme_opt;
2145     pme_fft_opt_ = pme_fft_opt;
2146 }
2147
2148 void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
2149 {
2150     bonded_opt_ = bonded_opt;
2151 }
2152
2153 void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt)
2154 {
2155     update_opt_ = update_opt;
2156 }
2157
2158 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions)
2159 {
2160     hardwareOptions_ = hardwareOptions;
2161 }
2162
2163 void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
2164 {
2165     filenames_ = filenames;
2166 }
2167
2168 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2169 {
2170     outputEnvironment_ = outputEnvironment;
2171 }
2172
2173 void Mdrunner::BuilderImplementation::addLogFile(t_fileio* logFileHandle)
2174 {
2175     logFileHandle_ = logFileHandle;
2176 }
2177
2178 void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2179 {
2180     stopHandlerBuilder_ = std::move(builder);
2181 }
2182
2183 void Mdrunner::BuilderImplementation::addInput(SimulationInputHandle inputHolder)
2184 {
2185     inputHolder_ = std::move(inputHolder);
2186 }
2187
2188 MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
2189                                  compat::not_null<SimulationContext*> context) :
2190     impl_{ std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules), context) }
2191 {
2192 }
2193
2194 MdrunnerBuilder::~MdrunnerBuilder() = default;
2195
2196 MdrunnerBuilder& MdrunnerBuilder::addSimulationMethod(const MdrunOptions&    options,
2197                                                       real                   forceWarningThreshold,
2198                                                       const StartingBehavior startingBehavior)
2199 {
2200     impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
2201     return *this;
2202 }
2203
2204 MdrunnerBuilder& MdrunnerBuilder::addDomainDecomposition(const DomdecOptions& options)
2205 {
2206     impl_->addDomdec(options);
2207     return *this;
2208 }
2209
2210 MdrunnerBuilder& MdrunnerBuilder::addNeighborList(int nstlist)
2211 {
2212     impl_->addVerletList(nstlist);
2213     return *this;
2214 }
2215
2216 MdrunnerBuilder& MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters& params)
2217 {
2218     impl_->addReplicaExchange(params);
2219     return *this;
2220 }
2221
2222 MdrunnerBuilder& MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
2223 {
2224     impl_->addNonBonded(nbpu_opt);
2225     return *this;
2226 }
2227
2228 MdrunnerBuilder& MdrunnerBuilder::addElectrostatics(const char* pme_opt, const char* pme_fft_opt)
2229 {
2230     // The builder method may become more general in the future, but in this version,
2231     // parameters for PME electrostatics are both required and the only parameters
2232     // available.
2233     if (pme_opt && pme_fft_opt)
2234     {
2235         impl_->addPME(pme_opt, pme_fft_opt);
2236     }
2237     else
2238     {
2239         GMX_THROW(
2240                 gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
2241     }
2242     return *this;
2243 }
2244
2245 MdrunnerBuilder& MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
2246 {
2247     impl_->addBondedTaskAssignment(bonded_opt);
2248     return *this;
2249 }
2250
2251 MdrunnerBuilder& MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt)
2252 {
2253     impl_->addUpdateTaskAssignment(update_opt);
2254     return *this;
2255 }
2256
2257 Mdrunner MdrunnerBuilder::build()
2258 {
2259     return impl_->build();
2260 }
2261
2262 MdrunnerBuilder& MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions)
2263 {
2264     impl_->addHardwareOptions(hardwareOptions);
2265     return *this;
2266 }
2267
2268 MdrunnerBuilder& MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
2269 {
2270     impl_->addFilenames(filenames);
2271     return *this;
2272 }
2273
2274 MdrunnerBuilder& MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2275 {
2276     impl_->addOutputEnvironment(outputEnvironment);
2277     return *this;
2278 }
2279
2280 MdrunnerBuilder& MdrunnerBuilder::addLogFile(t_fileio* logFileHandle)
2281 {
2282     impl_->addLogFile(logFileHandle);
2283     return *this;
2284 }
2285
2286 MdrunnerBuilder& MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2287 {
2288     impl_->addStopHandlerBuilder(std::move(builder));
2289     return *this;
2290 }
2291
2292 MdrunnerBuilder& MdrunnerBuilder::addInput(SimulationInputHandle input)
2293 {
2294     impl_->addInput(std::move(input));
2295     return *this;
2296 }
2297
2298 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder&&) noexcept = default;
2299
2300 MdrunnerBuilder& MdrunnerBuilder::operator=(MdrunnerBuilder&&) noexcept = default;
2301
2302 } // namespace gmx