src/gromacs/mdrun/runner.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the MD runner routine calling all integrators.
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include "runner.h"
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cinttypes>
  52 #include <csignal>
  53 #include <cstdlib>
  54 #include <cstring>
  55
  56 #include <algorithm>
  57 #include <memory>
  58
  59 #include "gromacs/commandline/filenm.h"
  60 #include "gromacs/domdec/builder.h"
  61 #include "gromacs/domdec/domdec.h"
  62 #include "gromacs/domdec/domdec_struct.h"
  63 #include "gromacs/domdec/gpuhaloexchange.h"
  64 #include "gromacs/domdec/localatomsetmanager.h"
  65 #include "gromacs/domdec/partition.h"
  66 #include "gromacs/domdec/reversetopology.h"
  67 #include "gromacs/ewald/ewald_utils.h"
  68 #include "gromacs/ewald/pme.h"
  69 #include "gromacs/ewald/pme_gpu_program.h"
  70 #include "gromacs/ewald/pme_only.h"
  71 #include "gromacs/ewald/pme_pp_comm_gpu.h"
  72 #include "gromacs/fileio/checkpoint.h"
  73 #include "gromacs/fileio/gmxfio.h"
  74 #include "gromacs/fileio/oenv.h"
  75 #include "gromacs/fileio/tpxio.h"
  76 #include "gromacs/gmxlib/network.h"
  77 #include "gromacs/gmxlib/nrnb.h"
  78 #include "gromacs/gpu_utils/device_stream_manager.h"
  79 #include "gromacs/hardware/cpuinfo.h"
  80 #include "gromacs/hardware/detecthardware.h"
  81 #include "gromacs/hardware/device_management.h"
  82 #include "gromacs/hardware/hardwaretopology.h"
  83 #include "gromacs/hardware/printhardware.h"
  84 #include "gromacs/imd/imd.h"
  85 #include "gromacs/listed_forces/disre.h"
  86 #include "gromacs/listed_forces/listed_forces_gpu.h"
  87 #include "gromacs/listed_forces/listed_forces.h"
  88 #include "gromacs/listed_forces/orires.h"
  89 #include "gromacs/math/functions.h"
  90 #include "gromacs/math/utilities.h"
  91 #include "gromacs/math/vec.h"
  92 #include "gromacs/mdlib/boxdeformation.h"
  93 #include "gromacs/mdlib/broadcaststructs.h"
  94 #include "gromacs/mdlib/calc_verletbuf.h"
  95 #include "gromacs/mdlib/dispersioncorrection.h"
  96 #include "gromacs/mdlib/enerdata_utils.h"
  97 #include "gromacs/mdlib/force.h"
  98 #include "gromacs/mdlib/forcerec.h"
  99 #include "gromacs/mdlib/gmx_omp_nthreads.h"
 100 #include "gromacs/mdlib/gpuforcereduction.h"
 101 #include "gromacs/mdlib/makeconstraints.h"
 102 #include "gromacs/mdlib/md_support.h"
 103 #include "gromacs/mdlib/mdatoms.h"
 104 #include "gromacs/mdlib/sighandler.h"
 105 #include "gromacs/mdlib/stophandler.h"
 106 #include "gromacs/mdlib/tgroup.h"
 107 #include "gromacs/mdlib/updategroups.h"
 108 #include "gromacs/mdlib/vsite.h"
 109 #include "gromacs/mdrun/mdmodules.h"
 110 #include "gromacs/mdrun/simulationcontext.h"
 111 #include "gromacs/mdrun/simulationinput.h"
 112 #include "gromacs/mdrun/simulationinputhandle.h"
 113 #include "gromacs/mdrunutility/handlerestart.h"
 114 #include "gromacs/mdrunutility/logging.h"
 115 #include "gromacs/mdrunutility/multisim.h"
 116 #include "gromacs/mdrunutility/printtime.h"
 117 #include "gromacs/mdrunutility/threadaffinity.h"
 118 #include "gromacs/mdtypes/checkpointdata.h"
 119 #include "gromacs/mdtypes/commrec.h"
 120 #include "gromacs/mdtypes/enerdata.h"
 121 #include "gromacs/mdtypes/fcdata.h"
 122 #include "gromacs/mdtypes/forcerec.h"
 123 #include "gromacs/mdtypes/group.h"
 124 #include "gromacs/mdtypes/inputrec.h"
 125 #include "gromacs/mdtypes/interaction_const.h"
 126 #include "gromacs/mdtypes/md_enums.h"
 127 #include "gromacs/mdtypes/mdatom.h"
 128 #include "gromacs/mdtypes/mdrunoptions.h"
 129 #include "gromacs/mdtypes/observableshistory.h"
 130 #include "gromacs/mdtypes/simulation_workload.h"
 131 #include "gromacs/mdtypes/state.h"
 132 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
 133 #include "gromacs/modularsimulator/modularsimulator.h"
 134 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 135 #include "gromacs/nbnxm/nbnxm.h"
 136 #include "gromacs/nbnxm/pairlist_tuning.h"
 137 #include "gromacs/pbcutil/pbc.h"
 138 #include "gromacs/pulling/output.h"
 139 #include "gromacs/pulling/pull.h"
 140 #include "gromacs/pulling/pull_rotation.h"
 141 #include "gromacs/restraint/manager.h"
 142 #include "gromacs/restraint/restraintmdmodule.h"
 143 #include "gromacs/restraint/restraintpotential.h"
 144 #include "gromacs/swap/swapcoords.h"
 145 #include "gromacs/taskassignment/decidegpuusage.h"
 146 #include "gromacs/taskassignment/decidesimulationworkload.h"
 147 #include "gromacs/taskassignment/resourcedivision.h"
 148 #include "gromacs/taskassignment/taskassignment.h"
 149 #include "gromacs/taskassignment/usergpuids.h"
 150 #include "gromacs/timing/gpu_timing.h"
 151 #include "gromacs/timing/wallcycle.h"
 152 #include "gromacs/timing/wallcyclereporting.h"
 153 #include "gromacs/topology/mtop_util.h"
 154 #include "gromacs/trajectory/trajectoryframe.h"
 155 #include "gromacs/utility/basenetwork.h"
 156 #include "gromacs/utility/cstringutil.h"
 157 #include "gromacs/utility/exceptions.h"
 158 #include "gromacs/utility/fatalerror.h"
 159 #include "gromacs/utility/filestream.h"
 160 #include "gromacs/utility/gmxassert.h"
 161 #include "gromacs/utility/gmxmpi.h"
 162 #include "gromacs/utility/keyvaluetree.h"
 163 #include "gromacs/utility/logger.h"
 164 #include "gromacs/utility/loggerbuilder.h"
 165 #include "gromacs/utility/mdmodulesnotifiers.h"
 166 #include "gromacs/utility/physicalnodecommunicator.h"
 167 #include "gromacs/utility/pleasecite.h"
 168 #include "gromacs/utility/programcontext.h"
 169 #include "gromacs/utility/smalloc.h"
 170 #include "gromacs/utility/stringutil.h"
 171 #include "gromacs/utility/mpiinfo.h"
 172
 173 #include "isimulator.h"
 174 #include "membedholder.h"
 175 #include "replicaexchange.h"
 176 #include "simulatorbuilder.h"
 177
 178 namespace gmx
 179 {
 180
 181
 182 /*! \brief Manage any development feature flag variables encountered
 183  *
 184  * The use of dev features indicated by environment variables is
 185  * logged in order to ensure that runs with such features enabled can
 186  * be identified from their log and standard output. Any cross
 187  * dependencies are also checked, and if unsatisfied, a fatal error
 188  * issued.
 189  *
 190  * Note that some development features overrides are applied already here:
 191  * the GPU communication flags are set to false in non-tMPI and non-CUDA builds.
 192  *
 193  * \param[in]  mdlog                Logger object.
 194  * \param[in]  useGpuForNonbonded   True if the nonbonded task is offloaded in this run.
 195  * \param[in]  pmeRunMode           The PME run mode for this run
 196  * \returns                         The object populated with development feature flags.
 197  */
 198 static DevelopmentFeatureFlags manageDevelopmentFeatures(const gmx::MDLogger& mdlog,
 199                                                          const bool           useGpuForNonbonded,
 200                                                          const PmeRunMode     pmeRunMode)
 201 {
 202     DevelopmentFeatureFlags devFlags;
 203
 204     // Some builds of GCC 5 give false positive warnings that these
 205     // getenv results are ignored when clearly they are used.
 206 #pragma GCC diagnostic push
 207 #pragma GCC diagnostic ignored "-Wunused-result"
 208
 209     devFlags.enableGpuBufferOps =
 210             GMX_GPU_CUDA && useGpuForNonbonded && (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 211     devFlags.enableGpuHaloExchange = GMX_GPU_CUDA && getenv("GMX_GPU_DD_COMMS") != nullptr;
 212     devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr) || GMX_FAHCORE;
 213     devFlags.enableGpuPmePPComm = GMX_GPU_CUDA && getenv("GMX_GPU_PME_PP_COMMS") != nullptr;
 214
 215 #pragma GCC diagnostic pop
 216
 217     // Direct GPU comm path is being used with CUDA_AWARE_MPI
 218     // make sure underlying MPI implementation is CUDA-aware
 219     if (!GMX_THREAD_MPI && (devFlags.enableGpuPmePPComm || devFlags.enableGpuHaloExchange))
 220     {
 221         const bool haveDetectedCudaAwareMpi =
 222                 (checkMpiCudaAwareSupport() == CudaAwareMpiStatus::Supported);
 223         const bool forceCudaAwareMpi = (getenv("GMX_FORCE_CUDA_AWARE_MPI") != nullptr);
 224
 225         if (!haveDetectedCudaAwareMpi && forceCudaAwareMpi)
 226         {
 227             // CUDA-aware support not detected in MPI library but, user has forced it's use
 228             GMX_LOG(mdlog.warning)
 229                     .asParagraph()
 230                     .appendTextFormatted(
 231                             "This run has forced use of 'CUDA-aware MPI'. "
 232                             "But, GROMACS cannot determine if underlying MPI "
 233                             "is CUDA-aware. GROMACS recommends use of latest openMPI version "
 234                             "for CUDA-aware support. "
 235                             "If you observe failures at runtime, try unsetting "
 236                             "GMX_FORCE_CUDA_AWARE_MPI environment variable.");
 237         }
 238
 239         if (haveDetectedCudaAwareMpi || forceCudaAwareMpi)
 240         {
 241             devFlags.usingCudaAwareMpi = true;
 242             GMX_LOG(mdlog.warning)
 243                     .asParagraph()
 244                     .appendTextFormatted(
 245                             "Using CUDA-aware MPI for 'GPU halo exchange' or 'GPU PME-PP "
 246                             "communications' feature.");
 247         }
 248         else
 249         {
 250             if (devFlags.enableGpuHaloExchange)
 251             {
 252                 GMX_LOG(mdlog.warning)
 253                         .asParagraph()
 254                         .appendTextFormatted(
 255                                 "GMX_GPU_DD_COMMS environment variable detected, but the 'GPU "
 256                                 "halo exchange' feature will not be enabled as GROMACS couldn't "
 257                                 "detect CUDA_aware support in underlying MPI implementation.");
 258                 devFlags.enableGpuHaloExchange = false;
 259             }
 260             if (devFlags.enableGpuPmePPComm)
 261             {
 262                 GMX_LOG(mdlog.warning)
 263                         .asParagraph()
 264                         .appendText(
 265                                 "GMX_GPU_PME_PP_COMMS environment variable detected, but the "
 266                                 "'GPU PME-PP communications' feature will not be enabled as "
 267                                 "GROMACS couldn't "
 268                                 "detect CUDA_aware support in underlying MPI implementation.");
 269                 devFlags.enableGpuPmePPComm = false;
 270             }
 271
 272             GMX_LOG(mdlog.warning)
 273                     .asParagraph()
 274                     .appendTextFormatted(
 275                             "GROMACS recommends use of latest OpenMPI version for CUDA-aware "
 276                             "support. "
 277                             "If you are certain about CUDA-aware support in your MPI library, "
 278                             "you can force it's use by setting environment variable "
 279                             " GMX_FORCE_CUDA_AWARE_MPI.");
 280         }
 281     }
 282
 283     if (devFlags.enableGpuBufferOps)
 284     {
 285         GMX_LOG(mdlog.warning)
 286                 .asParagraph()
 287                 .appendTextFormatted(
 288                         "This run uses the 'GPU buffer ops' feature, enabled by the "
 289                         "GMX_USE_GPU_BUFFER_OPS environment variable.");
 290     }
 291
 292     if (devFlags.forceGpuUpdateDefault)
 293     {
 294         GMX_LOG(mdlog.warning)
 295                 .asParagraph()
 296                 .appendTextFormatted(
 297                         "This run will default to '-update gpu' as requested by the "
 298                         "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable. GPU update with domain "
 299                         "decomposition lacks substantial testing and should be used with caution.");
 300     }
 301
 302     if (devFlags.enableGpuHaloExchange)
 303     {
 304         if (useGpuForNonbonded)
 305         {
 306             if (!devFlags.enableGpuBufferOps)
 307             {
 308                 GMX_LOG(mdlog.warning)
 309                         .asParagraph()
 310                         .appendTextFormatted(
 311                                 "Enabling GPU buffer operations required by GMX_GPU_DD_COMMS "
 312                                 "(equivalent with GMX_USE_GPU_BUFFER_OPS=1).");
 313                 devFlags.enableGpuBufferOps = true;
 314             }
 315             GMX_LOG(mdlog.warning)
 316                     .asParagraph()
 317                     .appendTextFormatted(
 318                             "This run has requested the 'GPU halo exchange' feature, enabled by "
 319                             "the "
 320                             "GMX_GPU_DD_COMMS environment variable.");
 321         }
 322         else
 323         {
 324             GMX_LOG(mdlog.warning)
 325                     .asParagraph()
 326                     .appendTextFormatted(
 327                             "GMX_GPU_DD_COMMS environment variable detected, but the 'GPU "
 328                             "halo exchange' feature will not be enabled as nonbonded interactions "
 329                             "are not offloaded.");
 330             devFlags.enableGpuHaloExchange = false;
 331         }
 332     }
 333
 334     if (devFlags.enableGpuPmePPComm)
 335     {
 336         if (pmeRunMode == PmeRunMode::GPU)
 337         {
 338             if (!devFlags.enableGpuBufferOps)
 339             {
 340                 GMX_LOG(mdlog.warning)
 341                         .asParagraph()
 342                         .appendTextFormatted(
 343                                 "Enabling GPU buffer operations required by GMX_GPU_PME_PP_COMMS "
 344                                 "(equivalent with GMX_USE_GPU_BUFFER_OPS=1).");
 345                 devFlags.enableGpuBufferOps = true;
 346             }
 347             GMX_LOG(mdlog.warning)
 348                     .asParagraph()
 349                     .appendTextFormatted(
 350                             "This run uses the 'GPU PME-PP communications' feature, enabled "
 351                             "by the GMX_GPU_PME_PP_COMMS environment variable.");
 352         }
 353         else
 354         {
 355             std::string clarification;
 356             if (pmeRunMode == PmeRunMode::Mixed)
 357             {
 358                 clarification =
 359                         "PME FFT and gather are not offloaded to the GPU (PME is running in mixed "
 360                         "mode).";
 361             }
 362             else
 363             {
 364                 clarification = "PME is not offloaded to the GPU.";
 365             }
 366             GMX_LOG(mdlog.warning)
 367                     .asParagraph()
 368                     .appendText(
 369                             "GMX_GPU_PME_PP_COMMS environment variable detected, but the "
 370                             "'GPU PME-PP communications' feature was not enabled as "
 371                             + clarification);
 372             devFlags.enableGpuPmePPComm = false;
 373         }
 374     }
 375
 376     return devFlags;
 377 }
 378
 379 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
 380  *
 381  * Used to ensure that the master thread does not modify mdrunner during copy
 382  * on the spawned threads. */
 383 static void threadMpiMdrunnerAccessBarrier()
 384 {
 385 #if GMX_THREAD_MPI
 386     MPI_Barrier(MPI_COMM_WORLD);
 387 #endif
 388 }
 389
 390 Mdrunner Mdrunner::cloneOnSpawnedThread() const
 391 {
 392     auto newRunner = Mdrunner(std::make_unique<MDModules>());
 393
 394     // All runners in the same process share a restraint manager resource because it is
 395     // part of the interface to the client code, which is associated only with the
 396     // original thread. Handles to the same resources can be obtained by copy.
 397     {
 398         newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
 399     }
 400
 401     // Copy members of master runner.
 402     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
 403     // Ref https://gitlab.com/gromacs/gromacs/-/issues/2587 and https://gitlab.com/gromacs/gromacs/-/issues/2375
 404     newRunner.hw_opt    = hw_opt;
 405     newRunner.filenames = filenames;
 406
 407     newRunner.hwinfo_         = hwinfo_;
 408     newRunner.oenv            = oenv;
 409     newRunner.mdrunOptions    = mdrunOptions;
 410     newRunner.domdecOptions   = domdecOptions;
 411     newRunner.nbpu_opt        = nbpu_opt;
 412     newRunner.pme_opt         = pme_opt;
 413     newRunner.pme_fft_opt     = pme_fft_opt;
 414     newRunner.bonded_opt      = bonded_opt;
 415     newRunner.update_opt      = update_opt;
 416     newRunner.nstlist_cmdline = nstlist_cmdline;
 417     newRunner.replExParams    = replExParams;
 418     newRunner.pforce          = pforce;
 419     // Give the spawned thread the newly created valid communicator
 420     // for the simulation.
 421     newRunner.libraryWorldCommunicator = MPI_COMM_WORLD;
 422     newRunner.simulationCommunicator   = MPI_COMM_WORLD;
 423     newRunner.ms                       = ms;
 424     newRunner.startingBehavior         = startingBehavior;
 425     newRunner.stopHandlerBuilder_      = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
 426     newRunner.inputHolder_             = inputHolder_;
 427
 428     threadMpiMdrunnerAccessBarrier();
 429
 430     return newRunner;
 431 }
 432
 433 /*! \brief The callback used for running on spawned threads.
 434  *
 435  * Obtains the pointer to the master mdrunner object from the one
 436  * argument permitted to the thread-launch API call, copies it to make
 437  * a new runner for this thread, reinitializes necessary data, and
 438  * proceeds to the simulation. */
 439 static void mdrunner_start_fn(const void* arg)
 440 {
 441     try
 442     {
 443         const auto* masterMdrunner = reinterpret_cast<const gmx::Mdrunner*>(arg);
 444         /* copy the arg list to make sure that it's thread-local. This
 445            doesn't copy pointed-to items, of course; fnm, cr and fplog
 446            are reset in the call below, all others should be const. */
 447         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
 448         mdrunner.mdrunner();
 449     }
 450     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 451 }
 452
 453
 454 void Mdrunner::spawnThreads(int numThreadsToLaunch)
 455 {
 456 #if GMX_THREAD_MPI
 457     /* now spawn new threads that start mdrunner_start_fn(), while
 458        the main thread returns. Thread affinity is handled later. */
 459     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE, mdrunner_start_fn, static_cast<const void*>(this))
 460         != TMPI_SUCCESS)
 461     {
 462         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
 463     }
 464
 465     // Give the master thread the newly created valid communicator for
 466     // the simulation.
 467     libraryWorldCommunicator = MPI_COMM_WORLD;
 468     simulationCommunicator   = MPI_COMM_WORLD;
 469     threadMpiMdrunnerAccessBarrier();
 470 #else
 471     GMX_UNUSED_VALUE(numThreadsToLaunch);
 472     GMX_UNUSED_VALUE(mdrunner_start_fn);
 473 #endif
 474 }
 475
 476 } // namespace gmx
 477
 478 /*! \brief Initialize variables for Verlet scheme simulation */
 479 static void prepare_verlet_scheme(FILE*               fplog,
 480                                   t_commrec*          cr,
 481                                   t_inputrec*         ir,
 482                                   int                 nstlist_cmdline,
 483                                   const gmx_mtop_t&   mtop,
 484                                   const matrix        box,
 485                                   bool                makeGpuPairList,
 486                                   const gmx::CpuInfo& cpuinfo)
 487 {
 488     // We checked the cut-offs in grompp, but double-check here.
 489     // We have PME+LJcutoff kernels for rcoulomb>rvdw.
 490     if (EEL_PME_EWALD(ir->coulombtype) && ir->vdwtype == VanDerWaalsType::Cut)
 491     {
 492         GMX_RELEASE_ASSERT(ir->rcoulomb >= ir->rvdw,
 493                            "With Verlet lists and PME we should have rcoulomb>=rvdw");
 494     }
 495     else
 496     {
 497         GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw,
 498                            "With Verlet lists and no PME rcoulomb and rvdw should be identical");
 499     }
 500     /* For NVE simulations, we will retain the initial list buffer */
 501     if (EI_DYNAMICS(ir->eI) && ir->verletbuf_tol > 0
 502         && !(EI_MD(ir->eI) && ir->etc == TemperatureCoupling::No))
 503     {
 504         /* Update the Verlet buffer size for the current run setup */
 505
 506         /* Here we assume SIMD-enabled kernels are being used. But as currently
 507          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 508          * and 4x2 gives a larger buffer than 4x4, this is ok.
 509          */
 510         ListSetupType listType =
 511                 (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
 512         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
 513
 514         const real rlist_new =
 515                 calcVerletBufferSize(mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
 516
 517         if (rlist_new != ir->rlist)
 518         {
 519             if (fplog != nullptr)
 520             {
 521                 fprintf(fplog,
 522                         "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 523                         ir->rlist,
 524                         rlist_new,
 525                         listSetup.cluster_size_i,
 526                         listSetup.cluster_size_j);
 527             }
 528             ir->rlist = rlist_new;
 529         }
 530     }
 531
 532     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
 533     {
 534         gmx_fatal(FARGS,
 535                   "Can not set nstlist without %s",
 536                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
 537     }
 538
 539     if (EI_DYNAMICS(ir->eI))
 540     {
 541         /* Set or try nstlist values */
 542         increaseNstlist(fplog, cr, ir, nstlist_cmdline, &mtop, box, makeGpuPairList, cpuinfo);
 543     }
 544 }
 545
 546 /*! \brief Override the nslist value in inputrec
 547  *
 548  * with value passed on the command line (if any)
 549  */
 550 static void override_nsteps_cmdline(const gmx::MDLogger& mdlog, int64_t nsteps_cmdline, t_inputrec* ir)
 551 {
 552     assert(ir);
 553
 554     /* override with anything else than the default -2 */
 555     if (nsteps_cmdline > -2)
 556     {
 557         char sbuf_steps[STEPSTRSIZE];
 558         char sbuf_msg[STRLEN];
 559
 560         ir->nsteps = nsteps_cmdline;
 561         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
 562         {
 563             sprintf(sbuf_msg,
 564                     "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
 565                     gmx_step_str(nsteps_cmdline, sbuf_steps),
 566                     fabs(nsteps_cmdline * ir->delta_t));
 567         }
 568         else
 569         {
 570             sprintf(sbuf_msg,
 571                     "Overriding nsteps with value passed on the command line: %s steps",
 572                     gmx_step_str(nsteps_cmdline, sbuf_steps));
 573         }
 574
 575         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
 576     }
 577     else if (nsteps_cmdline < -2)
 578     {
 579         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64, nsteps_cmdline);
 580     }
 581     /* Do nothing if nsteps_cmdline == -2 */
 582 }
 583
 584 namespace gmx
 585 {
 586
 587 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
 588  *
 589  * If not, and if a warning may be issued, logs a warning about
 590  * falling back to CPU code. With thread-MPI, only the first
 591  * call to this function should have \c issueWarning true. */
 592 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger& mdlog, const t_inputrec& ir, bool issueWarning)
 593 {
 594     bool        gpuIsUseful = true;
 595     std::string warning;
 596
 597     if (ir.opts.ngener - ir.nwall > 1)
 598     {
 599         /* The GPU code does not support more than one energy group.
 600          * If the user requested GPUs explicitly, a fatal error is given later.
 601          */
 602         gpuIsUseful = false;
 603         warning =
 604                 "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
 605                 "For better performance, run on the GPU without energy groups and then do "
 606                 "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
 607     }
 608
 609     if (EI_TPI(ir.eI))
 610     {
 611         gpuIsUseful = false;
 612         warning     = "TPI is not implemented for GPUs.";
 613     }
 614
 615     if (!gpuIsUseful && issueWarning)
 616     {
 617         GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
 618     }
 619
 620     return gpuIsUseful;
 621 }
 622
 623 //! Initializes the logger for mdrun.
 624 static gmx::LoggerOwner buildLogger(FILE* fplog, const bool isSimulationMasterRank)
 625 {
 626     gmx::LoggerBuilder builder;
 627     if (fplog != nullptr)
 628     {
 629         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
 630     }
 631     if (isSimulationMasterRank)
 632     {
 633         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
 634     }
 635     return builder.build();
 636 }
 637
 638 //! Make a TaskTarget from an mdrun argument string.
 639 static TaskTarget findTaskTarget(const char* optionString)
 640 {
 641     TaskTarget returnValue = TaskTarget::Auto;
 642
 643     if (strncmp(optionString, "auto", 3) == 0)
 644     {
 645         returnValue = TaskTarget::Auto;
 646     }
 647     else if (strncmp(optionString, "cpu", 3) == 0)
 648     {
 649         returnValue = TaskTarget::Cpu;
 650     }
 651     else if (strncmp(optionString, "gpu", 3) == 0)
 652     {
 653         returnValue = TaskTarget::Gpu;
 654     }
 655     else
 656     {
 657         GMX_ASSERT(false, "Option string should have been checked for sanity already");
 658     }
 659
 660     return returnValue;
 661 }
 662
 663 //! Finish run, aggregate data to print performance info.
 664 static void finish_run(FILE*                     fplog,
 665                        const gmx::MDLogger&      mdlog,
 666                        const t_commrec*          cr,
 667                        const t_inputrec&         inputrec,
 668                        t_nrnb                    nrnb[],
 669                        gmx_wallcycle*            wcycle,
 670                        gmx_walltime_accounting_t walltime_accounting,
 671                        nonbonded_verlet_t*       nbv,
 672                        const gmx_pme_t*          pme,
 673                        gmx_bool                  bWriteStat)
 674 {
 675     double delta_t = 0;
 676     double nbfs = 0, mflop = 0;
 677     double elapsed_time, elapsed_time_over_all_ranks, elapsed_time_over_all_threads,
 678             elapsed_time_over_all_threads_over_all_ranks;
 679     /* Control whether it is valid to print a report. Only the
 680        simulation master may print, but it should not do so if the run
 681        terminated e.g. before a scheduled reset step. This is
 682        complicated by the fact that PME ranks are unaware of the
 683        reason why they were sent a pmerecvqxFINISH. To avoid
 684        communication deadlocks, we always do the communication for the
 685        report, even if we've decided not to write the report, because
 686        how long it takes to finish the run is not important when we've
 687        decided not to report on the simulation performance.
 688
 689        Further, we only report performance for dynamical integrators,
 690        because those are the only ones for which we plan to
 691        consider doing any optimizations. */
 692     bool printReport = EI_DYNAMICS(inputrec.eI) && SIMMASTER(cr);
 693
 694     if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
 695     {
 696         GMX_LOG(mdlog.warning)
 697                 .asParagraph()
 698                 .appendText("Simulation ended prematurely, no performance report will be written.");
 699         printReport = false;
 700     }
 701
 702     t_nrnb*                 nrnb_tot;
 703     std::unique_ptr<t_nrnb> nrnbTotalStorage;
 704     if (cr->nnodes > 1)
 705     {
 706         nrnbTotalStorage = std::make_unique<t_nrnb>();
 707         nrnb_tot         = nrnbTotalStorage.get();
 708 #if GMX_MPI
 709         MPI_Allreduce(nrnb->n.data(), nrnb_tot->n.data(), eNRNB, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
 710 #endif
 711     }
 712     else
 713     {
 714         nrnb_tot = nrnb;
 715     }
 716
 717     elapsed_time = walltime_accounting_get_time_since_reset(walltime_accounting);
 718     elapsed_time_over_all_threads =
 719             walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
 720     if (cr->nnodes > 1)
 721     {
 722 #if GMX_MPI
 723         /* reduce elapsed_time over all MPI ranks in the current simulation */
 724         MPI_Allreduce(&elapsed_time, &elapsed_time_over_all_ranks, 1, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
 725         elapsed_time_over_all_ranks /= cr->nnodes;
 726         /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
 727          * current simulation. */
 728         MPI_Allreduce(&elapsed_time_over_all_threads,
 729                       &elapsed_time_over_all_threads_over_all_ranks,
 730                       1,
 731                       MPI_DOUBLE,
 732                       MPI_SUM,
 733                       cr->mpi_comm_mysim);
 734 #endif
 735     }
 736     else
 737     {
 738         elapsed_time_over_all_ranks                  = elapsed_time;
 739         elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
 740     }
 741
 742     if (printReport)
 743     {
 744         print_flop(fplog, nrnb_tot, &nbfs, &mflop);
 745     }
 746
 747     if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
 748     {
 749         print_dd_statistics(cr, inputrec, fplog);
 750     }
 751
 752     /* TODO Move the responsibility for any scaling by thread counts
 753      * to the code that handled the thread region, so that there's a
 754      * mechanism to keep cycle counting working during the transition
 755      * to task parallelism. */
 756     int nthreads_pp  = gmx_omp_nthreads_get(ModuleMultiThread::Nonbonded);
 757     int nthreads_pme = gmx_omp_nthreads_get(ModuleMultiThread::Pme);
 758     wallcycle_scale_by_num_threads(
 759             wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), nthreads_pp, nthreads_pme);
 760     auto cycle_sum(wallcycle_sum(cr, wcycle));
 761
 762     if (printReport)
 763     {
 764         auto* nbnxn_gpu_timings =
 765                 (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
 766         gmx_wallclock_gpu_pme_t pme_gpu_timings = {};
 767
 768         if (pme_gpu_task_enabled(pme))
 769         {
 770             pme_gpu_get_timings(pme, &pme_gpu_timings);
 771         }
 772         wallcycle_print(fplog,
 773                         mdlog,
 774                         cr->nnodes,
 775                         cr->npmenodes,
 776                         nthreads_pp,
 777                         nthreads_pme,
 778                         elapsed_time_over_all_ranks,
 779                         wcycle,
 780                         cycle_sum,
 781                         nbnxn_gpu_timings,
 782                         &pme_gpu_timings);
 783
 784         if (EI_DYNAMICS(inputrec.eI))
 785         {
 786             delta_t = inputrec.delta_t;
 787         }
 788
 789         if (fplog)
 790         {
 791             print_perf(fplog,
 792                        elapsed_time_over_all_threads_over_all_ranks,
 793                        elapsed_time_over_all_ranks,
 794                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 795                        delta_t,
 796                        nbfs,
 797                        mflop);
 798         }
 799         if (bWriteStat)
 800         {
 801             print_perf(stderr,
 802                        elapsed_time_over_all_threads_over_all_ranks,
 803                        elapsed_time_over_all_ranks,
 804                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 805                        delta_t,
 806                        nbfs,
 807                        mflop);
 808         }
 809     }
 810 }
 811
 812 int Mdrunner::mdrunner()
 813 {
 814     matrix                      box;
 815     std::unique_ptr<t_forcerec> fr;
 816     real                        ewaldcoeff_q     = 0;
 817     real                        ewaldcoeff_lj    = 0;
 818     int                         nChargePerturbed = -1, nTypePerturbed = 0;
 819     gmx_walltime_accounting_t   walltime_accounting = nullptr;
 820     MembedHolder                membedHolder(filenames.size(), filenames.data());
 821
 822     /* CAUTION: threads may be started later on in this function, so
 823        cr doesn't reflect the final parallel state right now */
 824     gmx_mtop_t mtop;
 825
 826     /* TODO: inputrec should tell us whether we use an algorithm, not a file option */
 827     const bool doEssentialDynamics = opt2bSet("-ei", filenames.size(), filenames.data());
 828     const bool doRerun             = mdrunOptions.rerun;
 829
 830     // Handle task-assignment related user options.
 831     EmulateGpuNonbonded emulateGpuNonbonded =
 832             (getenv("GMX_EMULATE_GPU") != nullptr ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
 833
 834     std::vector<int> userGpuTaskAssignment;
 835     try
 836     {
 837         userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
 838     }
 839     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 840     auto nonbondedTarget = findTaskTarget(nbpu_opt);
 841     auto pmeTarget       = findTaskTarget(pme_opt);
 842     auto pmeFftTarget    = findTaskTarget(pme_fft_opt);
 843     auto bondedTarget    = findTaskTarget(bonded_opt);
 844     auto updateTarget    = findTaskTarget(update_opt);
 845
 846     FILE* fplog = nullptr;
 847     // If we are appending, we don't write log output because we need
 848     // to check that the old log file matches what the checkpoint file
 849     // expects. Otherwise, we should start to write log output now if
 850     // there is a file ready for it.
 851     if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
 852     {
 853         fplog = gmx_fio_getfp(logFileHandle);
 854     }
 855     const bool isSimulationMasterRank = findIsSimulationMasterRank(ms, simulationCommunicator);
 856     gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank));
 857     gmx::MDLogger    mdlog(logOwner.logger());
 858
 859     gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo_);
 860
 861     std::vector<int> availableDevices =
 862             makeListOfAvailableDevices(hwinfo_->deviceInfoList, hw_opt.devicesSelectedByUser);
 863     const int numAvailableDevices = gmx::ssize(availableDevices);
 864
 865     // Print citation requests after all software/hardware printing
 866     pleaseCiteGromacs(fplog);
 867
 868     // Note: legacy program logic relies on checking whether these pointers are assigned.
 869     // Objects may or may not be allocated later.
 870     std::unique_ptr<t_inputrec> inputrec;
 871     std::unique_ptr<t_state>    globalState;
 872
 873     auto partialDeserializedTpr = std::make_unique<PartialDeserializedTprFile>();
 874
 875     if (isSimulationMasterRank)
 876     {
 877         // Allocate objects to be initialized by later function calls.
 878         /* Only the master rank has the global state */
 879         globalState = std::make_unique<t_state>();
 880         inputrec    = std::make_unique<t_inputrec>();
 881
 882         /* Read (nearly) all data required for the simulation
 883          * and keep the partly serialized tpr contents to send to other ranks later
 884          */
 885         applyGlobalSimulationState(
 886                 *inputHolder_.get(), partialDeserializedTpr.get(), globalState.get(), inputrec.get(), &mtop);
 887     }
 888
 889     /* Check and update the hardware options for internal consistency */
 890     checkAndUpdateHardwareOptions(
 891             mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks, inputrec.get());
 892
 893     if (GMX_THREAD_MPI && isSimulationMasterRank)
 894     {
 895         bool useGpuForNonbonded = false;
 896         bool useGpuForPme       = false;
 897         try
 898         {
 899             GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
 900
 901             // If the user specified the number of ranks, then we must
 902             // respect that, but in default mode, we need to allow for
 903             // the number of GPUs to choose the number of ranks.
 904             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 905             useGpuForNonbonded         = decideWhetherToUseGpusForNonbondedWithThreadMpi(
 906                     nonbondedTarget,
 907                     numAvailableDevices > 0,
 908                     userGpuTaskAssignment,
 909                     emulateGpuNonbonded,
 910                     canUseGpuForNonbonded,
 911                     gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
 912                     hw_opt.nthreads_tmpi);
 913             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(useGpuForNonbonded,
 914                                                                      pmeTarget,
 915                                                                      numAvailableDevices,
 916                                                                      userGpuTaskAssignment,
 917                                                                      *hwinfo_,
 918                                                                      *inputrec,
 919                                                                      hw_opt.nthreads_tmpi,
 920                                                                      domdecOptions.numPmeRanks);
 921         }
 922         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 923
 924         /* Determine how many thread-MPI ranks to start.
 925          *
 926          * TODO Over-writing the user-supplied value here does
 927          * prevent any possible subsequent checks from working
 928          * correctly. */
 929         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo_,
 930                                                 &hw_opt,
 931                                                 numAvailableDevices,
 932                                                 useGpuForNonbonded,
 933                                                 useGpuForPme,
 934                                                 inputrec.get(),
 935                                                 mtop,
 936                                                 mdlog,
 937                                                 membedHolder.doMembed());
 938
 939         // Now start the threads for thread MPI.
 940         spawnThreads(hw_opt.nthreads_tmpi);
 941         // The spawned threads enter mdrunner() and execution of
 942         // master and spawned threads joins at the end of this block.
 943     }
 944
 945     GMX_RELEASE_ASSERT(ms || simulationCommunicator != MPI_COMM_NULL,
 946                        "Must have valid communicator unless running a multi-simulation");
 947     CommrecHandle crHandle = init_commrec(simulationCommunicator);
 948     t_commrec*    cr       = crHandle.get();
 949     GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec");
 950
 951     PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash());
 952
 953     // If we detected the topology on this system, double-check that it makes sense
 954     if (hwinfo_->hardwareTopology->isThisSystem())
 955     {
 956         hardwareTopologyDoubleCheckDetection(mdlog, *hwinfo_->hardwareTopology);
 957     }
 958
 959     if (PAR(cr))
 960     {
 961         /* now broadcast everything to the non-master nodes/threads: */
 962         if (!isSimulationMasterRank)
 963         {
 964             // Until now, only the master rank has a non-null pointer.
 965             // On non-master ranks, allocate the object that will receive data in the following call.
 966             inputrec = std::make_unique<t_inputrec>();
 967         }
 968         init_parallel(cr->mpiDefaultCommunicator,
 969                       MASTER(cr),
 970                       inputrec.get(),
 971                       &mtop,
 972                       partialDeserializedTpr.get());
 973     }
 974     GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now");
 975     partialDeserializedTpr.reset(nullptr);
 976
 977     GMX_RELEASE_ASSERT(
 978             !inputrec->useConstantAcceleration,
 979             "Linear acceleration has been removed in GROMACS 2022, and was broken for many years "
 980             "before that. Use GROMACS 4.5 or earlier if you need this feature.");
 981
 982     // Now the number of ranks is known to all ranks, and each knows
 983     // the inputrec read by the master rank. The ranks can now all run
 984     // the task-deciding functions and will agree on the result
 985     // without needing to communicate.
 986     const bool useDomainDecomposition =
 987             (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == IntegrationAlgorithm::NM));
 988
 989     // Note that these variables describe only their own node.
 990     //
 991     // Note that when bonded interactions run on a GPU they always run
 992     // alongside a nonbonded task, so do not influence task assignment
 993     // even though they affect the force calculation workload.
 994     bool useGpuForNonbonded = false;
 995     bool useGpuForPme       = false;
 996     bool useGpuForBonded    = false;
 997     bool useGpuForUpdate    = false;
 998     bool gpusWereDetected   = hwinfo_->ngpu_compatible_tot > 0;
 999     try
1000     {
1001         // It's possible that there are different numbers of GPUs on
1002         // different nodes, which is the user's responsibility to
1003         // handle. If unsuitable, we will notice that during task
1004         // assignment.
1005         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
1006         useGpuForNonbonded         = decideWhetherToUseGpusForNonbonded(
1007                 nonbondedTarget,
1008                 userGpuTaskAssignment,
1009                 emulateGpuNonbonded,
1010                 canUseGpuForNonbonded,
1011                 gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI),
1012                 gpusWereDetected);
1013         useGpuForPme    = decideWhetherToUseGpusForPme(useGpuForNonbonded,
1014                                                     pmeTarget,
1015                                                     userGpuTaskAssignment,
1016                                                     *hwinfo_,
1017                                                     *inputrec,
1018                                                     cr->sizeOfDefaultCommunicator,
1019                                                     domdecOptions.numPmeRanks,
1020                                                     gpusWereDetected);
1021         useGpuForBonded = decideWhetherToUseGpusForBonded(
1022                 useGpuForNonbonded, useGpuForPme, bondedTarget, *inputrec, mtop, domdecOptions.numPmeRanks, gpusWereDetected);
1023     }
1024     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1025
1026     const PmeRunMode pmeRunMode = determinePmeRunMode(useGpuForPme, pmeFftTarget, *inputrec);
1027
1028     // Initialize development feature flags that enabled by environment variable
1029     // and report those features that are enabled.
1030     const DevelopmentFeatureFlags devFlags =
1031             manageDevelopmentFeatures(mdlog, useGpuForNonbonded, pmeRunMode);
1032
1033     const bool useModularSimulator = checkUseModularSimulator(false,
1034                                                               inputrec.get(),
1035                                                               doRerun,
1036                                                               mtop,
1037                                                               ms,
1038                                                               replExParams,
1039                                                               nullptr,
1040                                                               doEssentialDynamics,
1041                                                               membedHolder.doMembed());
1042
1043     // Build restraints.
1044     // TODO: hide restraint implementation details from Mdrunner.
1045     // There is nothing unique about restraints at this point as far as the
1046     // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
1047     // factory functions from the SimulationContext on which to call mdModules_->add().
1048     // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
1049     for (auto&& restraint : restraintManager_->getRestraints())
1050     {
1051         auto module = RestraintMDModule::create(restraint, restraint->sites());
1052         mdModules_->add(std::move(module));
1053     }
1054
1055     // TODO: Error handling
1056     mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
1057     // now that the MDModules know their options, they know which callbacks to sign up to
1058     mdModules_->subscribeToSimulationSetupNotifications();
1059     const auto& setupNotifier = mdModules_->notifiers().simulationSetupNotifier_;
1060
1061     if (inputrec->internalParameters != nullptr)
1062     {
1063         setupNotifier.notify(*inputrec->internalParameters);
1064     }
1065
1066     if (fplog != nullptr)
1067     {
1068         pr_inputrec(fplog, 0, "Input Parameters", inputrec.get(), FALSE);
1069         fprintf(fplog, "\n");
1070     }
1071
1072     if (SIMMASTER(cr))
1073     {
1074         /* In rerun, set velocities to zero if present */
1075         if (doRerun && ((globalState->flags & enumValueToBitMask(StateEntry::V)) != 0))
1076         {
1077             // rerun does not use velocities
1078             GMX_LOG(mdlog.info)
1079                     .asParagraph()
1080                     .appendText(
1081                             "Rerun trajectory contains velocities. Rerun does only evaluate "
1082                             "potential energy and forces. The velocities will be ignored.");
1083             for (int i = 0; i < globalState->natoms; i++)
1084             {
1085                 clear_rvec(globalState->v[i]);
1086             }
1087             globalState->flags &= ~enumValueToBitMask(StateEntry::V);
1088         }
1089
1090         /* now make sure the state is initialized and propagated */
1091         set_state_entries(globalState.get(), inputrec.get(), useModularSimulator);
1092     }
1093
1094     /* NM and TPI parallelize over force/energy calculations, not atoms,
1095      * so we need to initialize and broadcast the global state.
1096      */
1097     if (inputrec->eI == IntegrationAlgorithm::NM || inputrec->eI == IntegrationAlgorithm::TPI)
1098     {
1099         if (!MASTER(cr))
1100         {
1101             globalState = std::make_unique<t_state>();
1102         }
1103         broadcastStateWithoutDynamics(
1104                 cr->mpiDefaultCommunicator, DOMAINDECOMP(cr), PAR(cr), globalState.get());
1105     }
1106
1107     /* A parallel command line option consistency check that we can
1108        only do after any threads have started. */
1109     if (!PAR(cr)
1110         && (domdecOptions.numCells[XX] > 1 || domdecOptions.numCells[YY] > 1
1111             || domdecOptions.numCells[ZZ] > 1 || domdecOptions.numPmeRanks > 0))
1112     {
1113         gmx_fatal(FARGS,
1114                   "The -dd or -npme option request a parallel simulation, "
1115 #if !GMX_MPI
1116                   "but %s was compiled without threads or MPI enabled",
1117                   output_env_get_program_display_name(oenv));
1118 #elif GMX_THREAD_MPI
1119                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
1120 #else
1121                   "but %s was not started through mpirun/mpiexec or only one rank was requested "
1122                   "through mpirun/mpiexec",
1123                   output_env_get_program_display_name(oenv));
1124 #endif
1125     }
1126
1127     if (doRerun && (EI_ENERGY_MINIMIZATION(inputrec->eI) || IntegrationAlgorithm::NM == inputrec->eI))
1128     {
1129         gmx_fatal(FARGS,
1130                   "The .mdp file specified an energy mininization or normal mode algorithm, and "
1131                   "these are not compatible with mdrun -rerun");
1132     }
1133
1134     /* NMR restraints must be initialized before load_checkpoint,
1135      * since with time averaging the history is added to t_state.
1136      * For proper consistency check we therefore need to extend
1137      * t_state here.
1138      * So the PME-only nodes (if present) will also initialize
1139      * the distance restraints.
1140      */
1141
1142     /* This needs to be called before read_checkpoint to extend the state */
1143     t_disresdata* disresdata;
1144     snew(disresdata, 1);
1145     init_disres(fplog,
1146                 mtop,
1147                 inputrec.get(),
1148                 DisResRunMode::MDRun,
1149                 MASTER(cr) ? DDRole::Master : DDRole::Agent,
1150                 PAR(cr) ? NumRanks::Multiple : NumRanks::Single,
1151                 cr->mpi_comm_mysim,
1152                 ms,
1153                 disresdata,
1154                 globalState.get(),
1155                 replExParams.exchangeInterval > 0);
1156
1157     std::unique_ptr<t_oriresdata> oriresData;
1158     if (gmx_mtop_ftype_count(mtop, F_ORIRES) > 0)
1159     {
1160         oriresData = std::make_unique<t_oriresdata>(fplog, mtop, *inputrec, cr, ms, globalState.get());
1161     }
1162
1163     auto deform = prepareBoxDeformation(globalState != nullptr ? globalState->box : box,
1164                                         MASTER(cr) ? DDRole::Master : DDRole::Agent,
1165                                         PAR(cr) ? NumRanks::Multiple : NumRanks::Single,
1166                                         cr->mpi_comm_mygroup,
1167                                         *inputrec);
1168
1169 #if GMX_FAHCORE
1170     /* We have to remember the generation's first step before reading checkpoint.
1171        This way, we can report to the F@H core both the generation's first step
1172        and the restored first step, thus making it able to distinguish between
1173        an interruption/resume and start of the n-th generation simulation.
1174        Having this information, the F@H core can correctly calculate and report
1175        the progress.
1176      */
1177     int gen_first_step = 0;
1178     if (MASTER(cr))
1179     {
1180         gen_first_step = inputrec->init_step;
1181     }
1182 #endif
1183
1184     ObservablesHistory observablesHistory = {};
1185
1186     auto modularSimulatorCheckpointData = std::make_unique<ReadCheckpointDataHolder>();
1187     if (startingBehavior != StartingBehavior::NewSimulation)
1188     {
1189         /* Check if checkpoint file exists before doing continuation.
1190          * This way we can use identical input options for the first and subsequent runs...
1191          */
1192         if (mdrunOptions.numStepsCommandline > -2)
1193         {
1194             /* Temporarily set the number of steps to unlimited to avoid
1195              * triggering the nsteps check in load_checkpoint().
1196              * This hack will go away soon when the -nsteps option is removed.
1197              */
1198             inputrec->nsteps = -1;
1199         }
1200
1201         // Finish applying initial simulation state information from external sources on all ranks.
1202         // Reconcile checkpoint file data with Mdrunner state established up to this point.
1203         applyLocalState(*inputHolder_.get(),
1204                         logFileHandle,
1205                         cr,
1206                         domdecOptions.numCells,
1207                         inputrec.get(),
1208                         globalState.get(),
1209                         &observablesHistory,
1210                         mdrunOptions.reproducible,
1211                         mdModules_->notifiers(),
1212                         modularSimulatorCheckpointData.get(),
1213                         useModularSimulator);
1214         // TODO: (#3652) Synchronize filesystem state, SimulationInput contents, and program
1215         //  invariants
1216         //  on all code paths.
1217         // Write checkpoint or provide hook to update SimulationInput.
1218         // If there was a checkpoint file, SimulationInput contains more information
1219         // than if there wasn't. At this point, we have synchronized the in-memory
1220         // state with the filesystem state only for restarted simulations. We should
1221         // be calling applyLocalState unconditionally and expect that the completeness
1222         // of SimulationInput is not dependent on its creation method.
1223
1224         if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
1225         {
1226             // Now we can start normal logging to the truncated log file.
1227             fplog = gmx_fio_getfp(logFileHandle);
1228             prepareLogAppending(fplog);
1229             logOwner = buildLogger(fplog, MASTER(cr));
1230             mdlog    = logOwner.logger();
1231         }
1232     }
1233
1234 #if GMX_FAHCORE
1235     if (MASTER(cr))
1236     {
1237         fcRegisterSteps(inputrec->nsteps + inputrec->init_step, gen_first_step);
1238     }
1239 #endif
1240
1241     if (mdrunOptions.numStepsCommandline > -2)
1242     {
1243         GMX_LOG(mdlog.info)
1244                 .asParagraph()
1245                 .appendText(
1246                         "The -nsteps functionality is deprecated, and may be removed in a future "
1247                         "version. "
1248                         "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp "
1249                         "file field.");
1250     }
1251     /* override nsteps with value set on the commandline */
1252     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec.get());
1253
1254     if (isSimulationMasterRank)
1255     {
1256         copy_mat(globalState->box, box);
1257     }
1258
1259     if (PAR(cr))
1260     {
1261         gmx_bcast(sizeof(box), box, cr->mpiDefaultCommunicator);
1262     }
1263
1264     if (inputrec->cutoff_scheme != CutoffScheme::Verlet)
1265     {
1266         gmx_fatal(FARGS,
1267                   "This group-scheme .tpr file can no longer be run by mdrun. Please update to the "
1268                   "Verlet scheme, or use an earlier version of GROMACS if necessary.");
1269     }
1270     /* Update rlist and nstlist. */
1271     /* Note: prepare_verlet_scheme is calling increaseNstlist(...), which (while attempting to
1272      * increase rlist) tries to check if the newly chosen value fits with the DD scheme. As this is
1273      * run before any DD scheme is set up, this check is never executed. See #3334 for more details.
1274      */
1275     prepare_verlet_scheme(fplog,
1276                           cr,
1277                           inputrec.get(),
1278                           nstlist_cmdline,
1279                           mtop,
1280                           box,
1281                           useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes),
1282                           *hwinfo_->cpuInfo);
1283
1284     // We need to decide on update groups early, as this affects
1285     // inter-domain communication distances.
1286     auto       updateGroupingsPerMoleculeType = makeUpdateGroupingsPerMoleculeType(mtop);
1287     const real maxUpdateGroupRadius           = computeMaxUpdateGroupRadius(
1288             mtop, updateGroupingsPerMoleculeType, maxReferenceTemperature(*inputrec));
1289     const real   cutoffMargin = std::sqrt(max_cutoff2(inputrec->pbcType, box)) - inputrec->rlist;
1290     UpdateGroups updateGroups = makeUpdateGroups(mdlog,
1291                                                  std::move(updateGroupingsPerMoleculeType),
1292                                                  maxUpdateGroupRadius,
1293                                                  useDomainDecomposition,
1294                                                  systemHasConstraintsOrVsites(mtop),
1295                                                  cutoffMargin);
1296
1297     // This builder is necessary while we have multi-part construction
1298     // of DD. Before DD is constructed, we use the existence of
1299     // the builder object to indicate that further construction of DD
1300     // is needed.
1301     std::unique_ptr<DomainDecompositionBuilder> ddBuilder;
1302     if (useDomainDecomposition)
1303     {
1304         ddBuilder = std::make_unique<DomainDecompositionBuilder>(
1305                 mdlog,
1306                 cr,
1307                 domdecOptions,
1308                 mdrunOptions,
1309                 mtop,
1310                 *inputrec,
1311                 mdModules_->notifiers(),
1312                 box,
1313                 updateGroups.updateGroupingPerMoleculeType(),
1314                 updateGroups.useUpdateGroups(),
1315                 updateGroups.maxUpdateGroupRadius(),
1316                 positionsFromStatePointer(globalState.get()),
1317                 useGpuForNonbonded,
1318                 useGpuForPme);
1319     }
1320     else
1321     {
1322         /* PME, if used, is done on all nodes with 1D decomposition */
1323         cr->nnodes     = cr->sizeOfDefaultCommunicator;
1324         cr->sim_nodeid = cr->rankInDefaultCommunicator;
1325         cr->nodeid     = cr->rankInDefaultCommunicator;
1326         cr->npmenodes  = 0;
1327         cr->duty       = (DUTY_PP | DUTY_PME);
1328
1329         if (inputrec->pbcType == PbcType::Screw)
1330         {
1331             gmx_fatal(FARGS, "pbc=screw is only implemented with domain decomposition");
1332         }
1333     }
1334
1335     // Produce the task assignment for this rank - done after DD is constructed
1336     GpuTaskAssignments gpuTaskAssignments = GpuTaskAssignmentsBuilder::build(
1337             availableDevices,
1338             userGpuTaskAssignment,
1339             *hwinfo_,
1340             simulationCommunicator,
1341             physicalNodeComm,
1342             nonbondedTarget,
1343             pmeTarget,
1344             bondedTarget,
1345             updateTarget,
1346             useGpuForNonbonded,
1347             useGpuForPme,
1348             thisRankHasDuty(cr, DUTY_PP),
1349             // TODO cr->duty & DUTY_PME should imply that a PME
1350             // algorithm is active, but currently does not.
1351             EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
1352
1353     // Get the device handles for the modules, nullptr when no task is assigned.
1354     int                deviceId   = -1;
1355     DeviceInformation* deviceInfo = gpuTaskAssignments.initDevice(&deviceId);
1356
1357     // timing enabling - TODO put this in gpu_utils (even though generally this is just option handling?)
1358     bool useTiming = true;
1359
1360     if (GMX_GPU_CUDA)
1361     {
1362         /* WARNING: CUDA timings are incorrect with multiple streams.
1363          *          This is the main reason why they are disabled by default.
1364          */
1365         // TODO: Consider turning on by default when we can detect nr of streams.
1366         useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr);
1367     }
1368     else if (GMX_GPU_OPENCL)
1369     {
1370         useTiming = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
1371     }
1372
1373     // TODO Currently this is always built, yet DD partition code
1374     // checks if it is built before using it. Probably it should
1375     // become an MDModule that is made only when another module
1376     // requires it (e.g. pull, CompEl, density fitting), so that we
1377     // don't update the local atom sets unilaterally every step.
1378     LocalAtomSetManager atomSets;
1379     if (ddBuilder)
1380     {
1381         // TODO Pass the GPU streams to ddBuilder to use in buffer
1382         // transfers (e.g. halo exchange)
1383         cr->dd = ddBuilder->build(&atomSets);
1384         // The builder's job is done, so destruct it
1385         ddBuilder.reset(nullptr);
1386         // Note that local state still does not exist yet.
1387     }
1388
1389     // The GPU update is decided here because we need to know whether the constraints or
1390     // SETTLEs can span accross the domain borders (i.e. whether or not update groups are
1391     // defined). This is only known after DD is initialized, hence decision on using GPU
1392     // update is done so late.
1393     try
1394     {
1395         const bool haveFrozenAtoms = inputrecFrozenAtoms(inputrec.get());
1396
1397         useGpuForUpdate = decideWhetherToUseGpuForUpdate(useDomainDecomposition,
1398                                                          updateGroups.useUpdateGroups(),
1399                                                          pmeRunMode,
1400                                                          domdecOptions.numPmeRanks > 0,
1401                                                          useGpuForNonbonded,
1402                                                          updateTarget,
1403                                                          gpusWereDetected,
1404                                                          *inputrec,
1405                                                          mtop,
1406                                                          doEssentialDynamics,
1407                                                          gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
1408                                                          haveFrozenAtoms,
1409                                                          doRerun,
1410                                                          devFlags,
1411                                                          mdlog);
1412     }
1413     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1414
1415     const bool printHostName = (cr->nnodes > 1);
1416     gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
1417
1418     const bool disableNonbondedCalculation = (getenv("GMX_NO_NONBONDED") != nullptr);
1419     if (disableNonbondedCalculation)
1420     {
1421         /* turn off non-bonded calculations */
1422         GMX_LOG(mdlog.warning)
1423                 .asParagraph()
1424                 .appendText(
1425                         "Found environment variable GMX_NO_NONBONDED.\n"
1426                         "Disabling nonbonded calculations.");
1427     }
1428
1429     MdrunScheduleWorkload runScheduleWork;
1430
1431     bool useGpuDirectHalo = decideWhetherToUseGpuForHalo(devFlags,
1432                                                          havePPDomainDecomposition(cr),
1433                                                          useGpuForNonbonded,
1434                                                          useModularSimulator,
1435                                                          doRerun,
1436                                                          EI_ENERGY_MINIMIZATION(inputrec->eI));
1437
1438     // Also populates the simulation constant workload description.
1439     runScheduleWork.simulationWork = createSimulationWorkload(*inputrec,
1440                                                               disableNonbondedCalculation,
1441                                                               devFlags,
1442                                                               useGpuForNonbonded,
1443                                                               pmeRunMode,
1444                                                               useGpuForBonded,
1445                                                               useGpuForUpdate,
1446                                                               useGpuDirectHalo);
1447
1448     std::unique_ptr<DeviceStreamManager> deviceStreamManager = nullptr;
1449
1450     if (deviceInfo != nullptr)
1451     {
1452         if (DOMAINDECOMP(cr) && thisRankHasDuty(cr, DUTY_PP))
1453         {
1454             dd_setup_dlb_resource_sharing(cr, deviceId);
1455         }
1456         deviceStreamManager = std::make_unique<DeviceStreamManager>(
1457                 *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useTiming);
1458     }
1459
1460     // If the user chose a task assignment, give them some hints
1461     // where appropriate.
1462     if (!userGpuTaskAssignment.empty())
1463     {
1464         gpuTaskAssignments.logPerformanceHints(mdlog, numAvailableDevices);
1465     }
1466
1467     if (PAR(cr))
1468     {
1469         /* After possible communicator splitting in make_dd_communicators.
1470          * we can set up the intra/inter node communication.
1471          */
1472         gmx_setup_nodecomm(fplog, cr);
1473     }
1474
1475 #if GMX_MPI
1476     if (isMultiSim(ms))
1477     {
1478         GMX_LOG(mdlog.warning)
1479                 .asParagraph()
1480                 .appendTextFormatted(
1481                         "This is simulation %d out of %d running as a composite GROMACS\n"
1482                         "multi-simulation job. Setup for this simulation:\n",
1483                         ms->simulationIndex_,
1484                         ms->numSimulations_);
1485     }
1486     GMX_LOG(mdlog.warning)
1487             .appendTextFormatted("Using %d MPI %s\n",
1488                                  cr->nnodes,
1489 #    if GMX_THREAD_MPI
1490                                  cr->nnodes == 1 ? "thread" : "threads"
1491 #    else
1492                                  cr->nnodes == 1 ? "process" : "processes"
1493 #    endif
1494             );
1495     fflush(stderr);
1496 #endif
1497
1498     // If mdrun -pin auto honors any affinity setting that already
1499     // exists. If so, it is nice to provide feedback about whether
1500     // that existing affinity setting was from OpenMP or something
1501     // else, so we run this code both before and after we initialize
1502     // the OpenMP support.
1503     gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, FALSE);
1504     /* Check and update the number of OpenMP threads requested */
1505     checkAndUpdateRequestedNumOpenmpThreads(
1506             &hw_opt, *hwinfo_, cr, ms, physicalNodeComm.size_, pmeRunMode, mtop, *inputrec);
1507
1508     gmx_omp_nthreads_init(mdlog,
1509                           cr,
1510                           hwinfo_->nthreads_hw_avail,
1511                           physicalNodeComm.size_,
1512                           hw_opt.nthreads_omp,
1513                           hw_opt.nthreads_omp_pme,
1514                           !thisRankHasDuty(cr, DUTY_PP));
1515
1516     const bool bEnableFPE = gmxShouldEnableFPExceptions();
1517     // FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
1518     if (bEnableFPE)
1519     {
1520         gmx_feenableexcept();
1521     }
1522
1523     /* Now that we know the setup is consistent, check for efficiency */
1524     check_resource_division_efficiency(
1525             hwinfo_, gpuTaskAssignments.thisRankHasAnyGpuTask(), mdrunOptions.ntompOptionIsSet, cr, mdlog);
1526
1527     /* getting number of PP/PME threads on this MPI / tMPI rank.
1528        PME: env variable should be read only on one node to make sure it is
1529        identical everywhere;
1530      */
1531     const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP)
1532                                              ? gmx_omp_nthreads_get(ModuleMultiThread::Nonbonded)
1533                                              : gmx_omp_nthreads_get(ModuleMultiThread::Pme);
1534     checkHardwareOversubscription(
1535             numThreadsOnThisRank, cr->nodeid, *hwinfo_->hardwareTopology, physicalNodeComm, mdlog);
1536
1537     // Enable Peer access between GPUs where available
1538     // Only for DD, only master PP rank needs to perform setup, and only if thread MPI plus
1539     // any of the GPU communication features are active.
1540     if (DOMAINDECOMP(cr) && MASTER(cr) && thisRankHasDuty(cr, DUTY_PP) && GMX_THREAD_MPI
1541         && (runScheduleWork.simulationWork.useGpuHaloExchange
1542             || runScheduleWork.simulationWork.useGpuPmePpCommunication))
1543     {
1544         setupGpuDevicePeerAccess(gpuTaskAssignments.deviceIdsAssigned(), mdlog);
1545     }
1546
1547     if (hw_opt.threadAffinity != ThreadAffinity::Off)
1548     {
1549         /* Before setting affinity, check whether the affinity has changed
1550          * - which indicates that probably the OpenMP library has changed it
1551          * since we first checked).
1552          */
1553         gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, TRUE);
1554
1555         int numThreadsOnThisNode, intraNodeThreadOffset;
1556         analyzeThreadsOnThisNode(
1557                 physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode, &intraNodeThreadOffset);
1558
1559         /* Set the CPU affinity */
1560         gmx_set_thread_affinity(mdlog,
1561                                 cr,
1562                                 &hw_opt,
1563                                 *hwinfo_->hardwareTopology,
1564                                 numThreadsOnThisRank,
1565                                 numThreadsOnThisNode,
1566                                 intraNodeThreadOffset,
1567                                 nullptr);
1568     }
1569
1570     if (mdrunOptions.timingOptions.resetStep > -1)
1571     {
1572         GMX_LOG(mdlog.info)
1573                 .asParagraph()
1574                 .appendText(
1575                         "The -resetstep functionality is deprecated, and may be removed in a "
1576                         "future version.");
1577     }
1578     std::unique_ptr<gmx_wallcycle> wcycle =
1579             wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1580
1581     if (PAR(cr))
1582     {
1583         /* Master synchronizes its value of reset_counters with all nodes
1584          * including PME only nodes */
1585         int64_t reset_counters = wcycle_get_reset_counters(wcycle.get());
1586         gmx_bcast(sizeof(reset_counters), &reset_counters, cr->mpi_comm_mysim);
1587         wcycle_set_reset_counters(wcycle.get(), reset_counters);
1588     }
1589
1590     // Membrane embedding must be initialized before we call init_forcerec()
1591     membedHolder.initializeMembed(fplog,
1592                                   filenames.size(),
1593                                   filenames.data(),
1594                                   &mtop,
1595                                   inputrec.get(),
1596                                   globalState.get(),
1597                                   cr,
1598                                   &mdrunOptions.checkpointOptions.period);
1599
1600     const bool               thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
1601     std::unique_ptr<MDAtoms> mdAtoms;
1602     std::unique_ptr<VirtualSitesHandler> vsite;
1603     std::unique_ptr<ListedForcesGpu>     listedForcesGpu;
1604
1605     t_nrnb nrnb;
1606     if (thisRankHasDuty(cr, DUTY_PP))
1607     {
1608         setupNotifier.notify(*cr);
1609         setupNotifier.notify(&atomSets);
1610         setupNotifier.notify(mtop);
1611         setupNotifier.notify(inputrec->pbcType);
1612         setupNotifier.notify(SimulationTimeStep{ inputrec->delta_t });
1613         /* Initiate forcerecord */
1614         fr                 = std::make_unique<t_forcerec>();
1615         fr->forceProviders = mdModules_->initForceProviders();
1616         init_forcerec(fplog,
1617                       mdlog,
1618                       fr.get(),
1619                       *inputrec,
1620                       mtop,
1621                       cr,
1622                       box,
1623                       opt2fn("-table", filenames.size(), filenames.data()),
1624                       opt2fn("-tablep", filenames.size(), filenames.data()),
1625                       opt2fns("-tableb", filenames.size(), filenames.data()),
1626                       pforce);
1627         // Dirty hack, for fixing disres and orires should be made mdmodules
1628         fr->fcdata->disres = disresdata;
1629         fr->fcdata->orires.swap(oriresData);
1630
1631         // Save a handle to device stream manager to use elsewhere in the code
1632         // TODO: Forcerec is not a correct place to store it.
1633         fr->deviceStreamManager = deviceStreamManager.get();
1634
1635         if (runScheduleWork.simulationWork.useGpuPmePpCommunication && !thisRankHasDuty(cr, DUTY_PME))
1636         {
1637             GMX_RELEASE_ASSERT(
1638                     deviceStreamManager != nullptr,
1639                     "GPU device stream manager should be valid in order to use PME-PP direct "
1640                     "communications.");
1641             GMX_RELEASE_ASSERT(
1642                     deviceStreamManager->streamIsValid(DeviceStreamType::PmePpTransfer),
1643                     "GPU PP-PME stream should be valid in order to use GPU PME-PP direct "
1644                     "communications.");
1645             fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(
1646                     cr->mpi_comm_mysim,
1647                     cr->dd->pme_nodeid,
1648                     deviceStreamManager->context(),
1649                     deviceStreamManager->stream(DeviceStreamType::PmePpTransfer));
1650         }
1651
1652         fr->nbv = Nbnxm::init_nb_verlet(mdlog,
1653                                         *inputrec,
1654                                         *fr,
1655                                         cr,
1656                                         *hwinfo_,
1657                                         runScheduleWork.simulationWork.useGpuNonbonded,
1658                                         deviceStreamManager.get(),
1659                                         mtop,
1660                                         box,
1661                                         wcycle.get());
1662         // TODO: Move the logic below to a GPU bonded builder
1663         if (runScheduleWork.simulationWork.useGpuBonded)
1664         {
1665             GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
1666                                "GPU device stream manager should be valid in order to use GPU "
1667                                "version of bonded forces.");
1668             listedForcesGpu = std::make_unique<ListedForcesGpu>(
1669                     mtop.ffparams,
1670                     fr->ic->epsfac * fr->fudgeQQ,
1671                     deviceStreamManager->context(),
1672                     deviceStreamManager->bondedStream(havePPDomainDecomposition(cr)),
1673                     wcycle.get());
1674             fr->listedForcesGpu = listedForcesGpu.get();
1675         }
1676
1677         /* Initialize the mdAtoms structure.
1678          * mdAtoms is not filled with atom data,
1679          * as this can not be done now with domain decomposition.
1680          */
1681         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1682         if (globalState && thisRankHasPmeGpuTask)
1683         {
1684             // The pinning of coordinates in the global state object works, because we only use
1685             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1686             // points to the global state object without DD.
1687             // FIXME: MD and EM separately set up the local state - this should happen in the same
1688             // function, which should also perform the pinning.
1689             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1690         }
1691
1692         /* Initialize the virtual site communication */
1693         vsite = makeVirtualSitesHandler(
1694                 mtop, cr, fr->pbcType, updateGroups.updateGroupingPerMoleculeType());
1695
1696         calc_shifts(box, fr->shift_vec);
1697
1698         /* With periodic molecules the charge groups should be whole at start up
1699          * and the virtual sites should not be far from their proper positions.
1700          */
1701         if (!inputrec->bContinuation && MASTER(cr)
1702             && !(inputrec->pbcType != PbcType::No && inputrec->bPeriodicMols))
1703         {
1704             /* Make molecules whole at start of run */
1705             if (fr->pbcType != PbcType::No)
1706             {
1707                 do_pbc_first_mtop(fplog, inputrec->pbcType, box, &mtop, globalState->x.rvec_array());
1708             }
1709             if (vsite)
1710             {
1711                 /* Correct initial vsite positions are required
1712                  * for the initial distribution in the domain decomposition
1713                  * and for the initial shell prediction.
1714                  */
1715                 constructVirtualSitesGlobal(mtop, globalState->x);
1716             }
1717         }
1718         // Make the DD reverse topology, now that any vsites that are present are available
1719         if (DOMAINDECOMP(cr))
1720         {
1721             dd_make_reverse_top(fplog, cr->dd, mtop, vsite.get(), *inputrec, domdecOptions.ddBondedChecking);
1722         }
1723
1724         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1725         {
1726             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1727             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1728         }
1729     }
1730     else
1731     {
1732         /* This is a PME only node */
1733
1734         GMX_ASSERT(globalState == nullptr,
1735                    "We don't need the state on a PME only rank and expect it to be unitialized");
1736
1737         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1738         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1739     }
1740
1741     gmx_pme_t* sepPmeData = nullptr;
1742     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1743     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr),
1744                "Double-checking that only PME-only ranks have no forcerec");
1745     gmx_pme_t*& pmedata = fr ? fr->pmedata : sepPmeData;
1746
1747     // TODO should live in ewald module once its testing is improved
1748     //
1749     // Later, this program could contain kernels that might be later
1750     // re-used as auto-tuning progresses, or subsequent simulations
1751     // are invoked.
1752     PmeGpuProgramStorage pmeGpuProgram;
1753     if (thisRankHasPmeGpuTask)
1754     {
1755         GMX_RELEASE_ASSERT(
1756                 (deviceStreamManager != nullptr),
1757                 "GPU device stream manager should be initialized in order to use GPU for PME.");
1758         GMX_RELEASE_ASSERT((deviceInfo != nullptr),
1759                            "GPU device should be initialized in order to use GPU for PME.");
1760         pmeGpuProgram = buildPmeGpuProgram(deviceStreamManager->context());
1761     }
1762
1763     /* Initiate PME if necessary,
1764      * either on all nodes or on dedicated PME nodes only. */
1765     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1766     {
1767         if (mdAtoms && mdAtoms->mdatoms())
1768         {
1769             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1770             if (EVDW_PME(inputrec->vdwtype))
1771             {
1772                 nTypePerturbed = mdAtoms->mdatoms()->nTypePerturbed;
1773             }
1774         }
1775         if (cr->npmenodes > 0)
1776         {
1777             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1778             gmx_bcast(sizeof(nChargePerturbed), &nChargePerturbed, cr->mpi_comm_mysim);
1779             gmx_bcast(sizeof(nTypePerturbed), &nTypePerturbed, cr->mpi_comm_mysim);
1780         }
1781
1782         if (thisRankHasDuty(cr, DUTY_PME))
1783         {
1784             try
1785             {
1786                 // TODO: This should be in the builder.
1787                 GMX_RELEASE_ASSERT(!runScheduleWork.simulationWork.useGpuPme
1788                                            || (deviceStreamManager != nullptr),
1789                                    "Device stream manager should be valid in order to use GPU "
1790                                    "version of PME.");
1791                 GMX_RELEASE_ASSERT(
1792                         !runScheduleWork.simulationWork.useGpuPme
1793                                 || deviceStreamManager->streamIsValid(DeviceStreamType::Pme),
1794                         "GPU PME stream should be valid in order to use GPU version of PME.");
1795
1796                 const DeviceContext* deviceContext = runScheduleWork.simulationWork.useGpuPme
1797                                                              ? &deviceStreamManager->context()
1798                                                              : nullptr;
1799                 const DeviceStream*  pmeStream =
1800                         runScheduleWork.simulationWork.useGpuPme
1801                                  ? &deviceStreamManager->stream(DeviceStreamType::Pme)
1802                                  : nullptr;
1803
1804                 pmedata = gmx_pme_init(cr,
1805                                        getNumPmeDomains(cr->dd),
1806                                        inputrec.get(),
1807                                        nChargePerturbed != 0,
1808                                        nTypePerturbed != 0,
1809                                        mdrunOptions.reproducible,
1810                                        ewaldcoeff_q,
1811                                        ewaldcoeff_lj,
1812                                        gmx_omp_nthreads_get(ModuleMultiThread::Pme),
1813                                        pmeRunMode,
1814                                        nullptr,
1815                                        deviceContext,
1816                                        pmeStream,
1817                                        pmeGpuProgram.get(),
1818                                        mdlog);
1819             }
1820             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
1821         }
1822     }
1823
1824
1825     if (EI_DYNAMICS(inputrec->eI))
1826     {
1827         /* Turn on signal handling on all nodes */
1828         /*
1829          * (A user signal from the PME nodes (if any)
1830          * is communicated to the PP nodes.
1831          */
1832         signal_handler_install();
1833     }
1834
1835     pull_t* pull_work = nullptr;
1836     if (thisRankHasDuty(cr, DUTY_PP))
1837     {
1838         /* Assumes uniform use of the number of OpenMP threads */
1839         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(ModuleMultiThread::Default));
1840
1841         if (inputrec->bPull)
1842         {
1843             /* Initialize pull code */
1844             pull_work = init_pull(fplog,
1845                                   inputrec->pull.get(),
1846                                   inputrec.get(),
1847                                   mtop,
1848                                   cr,
1849                                   &atomSets,
1850                                   inputrec->fepvals->init_lambda);
1851             if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
1852             {
1853                 initPullHistory(pull_work, &observablesHistory);
1854             }
1855             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1856             {
1857                 init_pull_output_files(pull_work, filenames.size(), filenames.data(), oenv, startingBehavior);
1858             }
1859         }
1860
1861         std::unique_ptr<EnforcedRotation> enforcedRotation;
1862         if (inputrec->bRot)
1863         {
1864             /* Initialize enforced rotation code */
1865             enforcedRotation = init_rot(fplog,
1866                                         inputrec.get(),
1867                                         filenames.size(),
1868                                         filenames.data(),
1869                                         cr,
1870                                         &atomSets,
1871                                         globalState.get(),
1872                                         mtop,
1873                                         oenv,
1874                                         mdrunOptions,
1875                                         startingBehavior);
1876         }
1877
1878         t_swap* swap = nullptr;
1879         if (inputrec->eSwapCoords != SwapType::No)
1880         {
1881             /* Initialize ion swapping code */
1882             swap = init_swapcoords(fplog,
1883                                    inputrec.get(),
1884                                    opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
1885                                    mtop,
1886                                    globalState.get(),
1887                                    &observablesHistory,
1888                                    cr,
1889                                    &atomSets,
1890                                    oenv,
1891                                    mdrunOptions,
1892                                    startingBehavior);
1893         }
1894
1895         /* Let makeConstraints know whether we have essential dynamics constraints. */
1896         auto constr = makeConstraints(mtop,
1897                                       *inputrec,
1898                                       pull_work,
1899                                       doEssentialDynamics,
1900                                       fplog,
1901                                       cr,
1902                                       updateGroups.useUpdateGroups(),
1903                                       ms,
1904                                       &nrnb,
1905                                       wcycle.get(),
1906                                       fr->bMolPBC);
1907
1908         /* Energy terms and groups */
1909         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
1910                              inputrec->fepvals->n_lambda);
1911
1912         // cos acceleration is only supported by md, but older tpr
1913         // files might still combine it with other integrators
1914         GMX_RELEASE_ASSERT(inputrec->cos_accel == 0.0 || inputrec->eI == IntegrationAlgorithm::MD,
1915                            "cos_acceleration is only supported by integrator=md");
1916
1917         /* Kinetic energy data */
1918         gmx_ekindata_t ekind(inputrec->opts.ngtc,
1919                              inputrec->cos_accel,
1920                              gmx_omp_nthreads_get(ModuleMultiThread::Update));
1921
1922         /* Set up interactive MD (IMD) */
1923         auto imdSession = makeImdSession(inputrec.get(),
1924                                          cr,
1925                                          wcycle.get(),
1926                                          &enerd,
1927                                          ms,
1928                                          mtop,
1929                                          mdlog,
1930                                          MASTER(cr) ? globalState->x : gmx::ArrayRef<gmx::RVec>(),
1931                                          filenames.size(),
1932                                          filenames.data(),
1933                                          oenv,
1934                                          mdrunOptions.imdOptions,
1935                                          startingBehavior);
1936
1937         if (DOMAINDECOMP(cr))
1938         {
1939             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1940             /* This call is not included in init_domain_decomposition
1941              * because fr->atomInfoForEachMoleculeBlock is set later.
1942              */
1943             makeBondedLinks(cr->dd, mtop, fr->atomInfoForEachMoleculeBlock);
1944         }
1945
1946         if (runScheduleWork.simulationWork.useGpuBufferOps)
1947         {
1948             fr->gpuForceReduction[gmx::AtomLocality::Local] = std::make_unique<gmx::GpuForceReduction>(
1949                     deviceStreamManager->context(),
1950                     deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedLocal),
1951                     wcycle.get());
1952             fr->gpuForceReduction[gmx::AtomLocality::NonLocal] = std::make_unique<gmx::GpuForceReduction>(
1953                     deviceStreamManager->context(),
1954                     deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedNonLocal),
1955                     wcycle.get());
1956         }
1957
1958         std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
1959         if (gpusWereDetected
1960             && ((runScheduleWork.simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME))
1961                 || runScheduleWork.simulationWork.useGpuBufferOps))
1962         {
1963             GpuApiCallBehavior transferKind =
1964                     (inputrec->eI == IntegrationAlgorithm::MD && !doRerun && !useModularSimulator)
1965                             ? GpuApiCallBehavior::Async
1966                             : GpuApiCallBehavior::Sync;
1967             GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
1968                                "GPU device stream manager should be initialized to use GPU.");
1969             stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
1970                     *deviceStreamManager, transferKind, pme_gpu_get_block_size(fr->pmedata), wcycle.get());
1971             fr->stateGpu = stateGpu.get();
1972         }
1973
1974         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
1975         SimulatorBuilder simulatorBuilder;
1976
1977         simulatorBuilder.add(SimulatorStateData(globalState.get(), &observablesHistory, &enerd, &ekind));
1978         simulatorBuilder.add(std::move(membedHolder));
1979         simulatorBuilder.add(std::move(stopHandlerBuilder_));
1980         simulatorBuilder.add(SimulatorConfig(mdrunOptions, startingBehavior, &runScheduleWork));
1981
1982
1983         simulatorBuilder.add(SimulatorEnv(fplog, cr, ms, mdlog, oenv));
1984         simulatorBuilder.add(Profiling(&nrnb, walltime_accounting, wcycle.get()));
1985         simulatorBuilder.add(ConstraintsParam(
1986                 constr.get(), enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr, vsite.get()));
1987         // TODO: Separate `fr` to a separate add, and make the `build` handle the coupling sensibly.
1988         simulatorBuilder.add(LegacyInput(
1989                 static_cast<int>(filenames.size()), filenames.data(), inputrec.get(), fr.get()));
1990         simulatorBuilder.add(ReplicaExchangeParameters(replExParams));
1991         simulatorBuilder.add(InteractiveMD(imdSession.get()));
1992         simulatorBuilder.add(SimulatorModules(mdModules_->outputProvider(), mdModules_->notifiers()));
1993         simulatorBuilder.add(CenterOfMassPulling(pull_work));
1994         // Todo move to an MDModule
1995         simulatorBuilder.add(IonSwapping(swap));
1996         simulatorBuilder.add(TopologyData(mtop, mdAtoms.get()));
1997         simulatorBuilder.add(BoxDeformationHandle(deform.get()));
1998         simulatorBuilder.add(std::move(modularSimulatorCheckpointData));
1999
2000         // build and run simulator object based on user-input
2001         auto simulator = simulatorBuilder.build(useModularSimulator);
2002         simulator->run();
2003
2004         if (fr->pmePpCommGpu)
2005         {
2006             // destroy object since it is no longer required. (This needs to be done while the GPU context still exists.)
2007             fr->pmePpCommGpu.reset();
2008         }
2009
2010         if (inputrec->bPull)
2011         {
2012             finish_pull(pull_work);
2013         }
2014         finish_swapcoords(swap);
2015     }
2016     else
2017     {
2018         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
2019         /* do PME only */
2020         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(ModuleMultiThread::Pme));
2021         gmx_pmeonly(pmedata,
2022                     cr,
2023                     &nrnb,
2024                     wcycle.get(),
2025                     walltime_accounting,
2026                     inputrec.get(),
2027                     pmeRunMode,
2028                     runScheduleWork.simulationWork.useGpuPmePpCommunication,
2029                     deviceStreamManager.get());
2030     }
2031
2032     wallcycle_stop(wcycle.get(), WallCycleCounter::Run);
2033
2034     /* Finish up, write some stuff
2035      * if rerunMD, don't write last frame again
2036      */
2037     finish_run(fplog,
2038                mdlog,
2039                cr,
2040                *inputrec,
2041                &nrnb,
2042                wcycle.get(),
2043                walltime_accounting,
2044                fr ? fr->nbv.get() : nullptr,
2045                pmedata,
2046                EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
2047
2048
2049     deviceStreamManager.reset(nullptr);
2050     // Free PME data
2051     if (pmedata)
2052     {
2053         gmx_pme_destroy(pmedata);
2054         pmedata = nullptr;
2055     }
2056
2057     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
2058     // before we destroy the GPU context(s)
2059     // Pinned buffers are associated with contexts in CUDA.
2060     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
2061     mdAtoms.reset(nullptr);
2062     globalState.reset(nullptr);
2063     mdModules_.reset(nullptr); // destruct force providers here as they might also use the GPU
2064     listedForcesGpu.reset(nullptr);
2065     fr.reset(nullptr); // destruct forcerec before gpu
2066     // TODO convert to C++ so we can get rid of these frees
2067     sfree(disresdata);
2068
2069     if (!hwinfo_->deviceInfoList.empty())
2070     {
2071         /* stop the GPU profiler (only CUDA) */
2072         stopGpuProfiler();
2073     }
2074
2075     /* With tMPI we need to wait for all ranks to finish deallocation before
2076      * destroying the CUDA context as some tMPI ranks may be sharing
2077      * GPU and context.
2078      *
2079      * This is not a concern in OpenCL where we use one context per rank.
2080      *
2081      * Note: it is safe to not call the barrier on the ranks which do not use GPU,
2082      * but it is easier and more futureproof to call it on the whole node.
2083      *
2084      * Note that this function needs to be called even if GPUs are not used
2085      * in this run because the PME ranks have no knowledge of whether GPUs
2086      * are used or not, but all ranks need to enter the barrier below.
2087      * \todo Remove this physical node barrier after making sure
2088      * that it's not needed anymore (with a shared GPU run).
2089      */
2090     if (GMX_THREAD_MPI)
2091     {
2092         physicalNodeComm.barrier();
2093     }
2094
2095     if (!devFlags.usingCudaAwareMpi)
2096     {
2097         // Don't reset GPU in case of CUDA-AWARE MPI
2098         // UCX creates CUDA buffers which are cleaned-up as part of MPI_Finalize()
2099         // resetting the device before MPI_Finalize() results in crashes inside UCX
2100         releaseDevice(deviceInfo);
2101     }
2102
2103     /* Does what it says */
2104     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
2105     walltime_accounting_destroy(walltime_accounting);
2106
2107     // Ensure log file content is written
2108     if (logFileHandle)
2109     {
2110         gmx_fio_flush(logFileHandle);
2111     }
2112
2113     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
2114      * exceptions were enabled before function was called. */
2115     if (bEnableFPE)
2116     {
2117         gmx_fedisableexcept();
2118     }
2119
2120     auto rc = static_cast<int>(gmx_get_stop_condition());
2121
2122 #if GMX_THREAD_MPI
2123     /* we need to join all threads. The sub-threads join when they
2124        exit this function, but the master thread needs to be told to
2125        wait for that. */
2126     if (MASTER(cr))
2127     {
2128         tMPI_Finalize();
2129     }
2130 #endif
2131     return rc;
2132 } // namespace gmx
2133
2134 Mdrunner::~Mdrunner()
2135 {
2136     // Clean up of the Manager.
2137     // This will end up getting called on every thread-MPI rank, which is unnecessary,
2138     // but okay as long as threads synchronize some time before adding or accessing
2139     // a new set of restraints.
2140     if (restraintManager_)
2141     {
2142         restraintManager_->clear();
2143         GMX_ASSERT(restraintManager_->countRestraints() == 0,
2144                    "restraints added during runner life time should be cleared at runner "
2145                    "destruction.");
2146     }
2147 };
2148
2149 void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller, const std::string& name)
2150 {
2151     GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
2152     // Not sure if this should be logged through the md logger or something else,
2153     // but it is helpful to have some sort of INFO level message sent somewhere.
2154     //    std::cout << "Registering restraint named " << name << std::endl;
2155
2156     // When multiple restraints are used, it may be wasteful to register them separately.
2157     // Maybe instead register an entire Restraint Manager as a force provider.
2158     restraintManager_->addToSpec(std::move(puller), name);
2159 }
2160
2161 Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules) : mdModules_(std::move(mdModules)) {}
2162
2163 Mdrunner::Mdrunner(Mdrunner&&) noexcept = default;
2164
2165 //NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265 in CentOS 7
2166 Mdrunner& Mdrunner::operator=(Mdrunner&& /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
2167
2168 class Mdrunner::BuilderImplementation
2169 {
2170 public:
2171     BuilderImplementation() = delete;
2172     BuilderImplementation(std::unique_ptr<MDModules> mdModules, compat::not_null<SimulationContext*> context);
2173     ~BuilderImplementation();
2174
2175     BuilderImplementation& setExtraMdrunOptions(const MdrunOptions& options,
2176                                                 real                forceWarningThreshold,
2177                                                 StartingBehavior    startingBehavior);
2178
2179     void addHardwareDetectionResult(const gmx_hw_info_t* hwinfo);
2180
2181     void addDomdec(const DomdecOptions& options);
2182
2183     void addInput(SimulationInputHandle inputHolder);
2184
2185     void addVerletList(int nstlist);
2186
2187     void addReplicaExchange(const ReplicaExchangeParameters& params);
2188
2189     void addNonBonded(const char* nbpu_opt);
2190
2191     void addPME(const char* pme_opt_, const char* pme_fft_opt_);
2192
2193     void addBondedTaskAssignment(const char* bonded_opt);
2194
2195     void addUpdateTaskAssignment(const char* update_opt);
2196
2197     void addHardwareOptions(const gmx_hw_opt_t& hardwareOptions);
2198
2199     void addFilenames(ArrayRef<const t_filenm> filenames);
2200
2201     void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
2202
2203     void addLogFile(t_fileio* logFileHandle);
2204
2205     void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
2206
2207     Mdrunner build();
2208
2209 private:
2210     // Default parameters copied from runner.h
2211     // \todo Clarify source(s) of default parameters.
2212
2213     const char* nbpu_opt_    = nullptr;
2214     const char* pme_opt_     = nullptr;
2215     const char* pme_fft_opt_ = nullptr;
2216     const char* bonded_opt_  = nullptr;
2217     const char* update_opt_  = nullptr;
2218
2219     MdrunOptions mdrunOptions_;
2220
2221     DomdecOptions domdecOptions_;
2222
2223     ReplicaExchangeParameters replicaExchangeParameters_;
2224
2225     //! Command-line override for the duration of a neighbor list with the Verlet scheme.
2226     int nstlist_ = 0;
2227
2228     //! World communicator, used for hardware detection and task assignment
2229     MPI_Comm libraryWorldCommunicator_ = MPI_COMM_NULL;
2230
2231     //! Multisim communicator handle.
2232     gmx_multisim_t* multiSimulation_;
2233
2234     //! mdrun communicator
2235     MPI_Comm simulationCommunicator_ = MPI_COMM_NULL;
2236
2237     //! Print a warning if any force is larger than this (in kJ/mol nm).
2238     real forceWarningThreshold_ = -1;
2239
2240     //! Whether the simulation will start afresh, or restart with/without appending.
2241     StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
2242
2243     //! The modules that comprise the functionality of mdrun.
2244     std::unique_ptr<MDModules> mdModules_;
2245
2246     //! Detected hardware.
2247     const gmx_hw_info_t* hwinfo_ = nullptr;
2248
2249     //! \brief Parallelism information.
2250     gmx_hw_opt_t hardwareOptions_;
2251
2252     //! filename options for simulation.
2253     ArrayRef<const t_filenm> filenames_;
2254
2255     /*! \brief Handle to output environment.
2256      *
2257      * \todo gmx_output_env_t needs lifetime management.
2258      */
2259     gmx_output_env_t* outputEnvironment_ = nullptr;
2260
2261     /*! \brief Non-owning handle to MD log file.
2262      *
2263      * \todo Context should own output facilities for client.
2264      * \todo Improve log file handle management.
2265      * \internal
2266      * Code managing the FILE* relies on the ability to set it to
2267      * nullptr to check whether the filehandle is valid.
2268      */
2269     t_fileio* logFileHandle_ = nullptr;
2270
2271     /*!
2272      * \brief Builder for simulation stop signal handler.
2273      */
2274     std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
2275
2276     /*!
2277      * \brief Sources for initial simulation state.
2278      *
2279      * See issue #3652 for near-term refinements to the SimulationInput interface.
2280      *
2281      * See issue #3379 for broader discussion on API aspects of simulation inputs and outputs.
2282      */
2283     SimulationInputHandle inputHolder_;
2284 };
2285
2286 Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules,
2287                                                        compat::not_null<SimulationContext*> context) :
2288     mdModules_(std::move(mdModules))
2289 {
2290     libraryWorldCommunicator_ = context->libraryWorldCommunicator_;
2291     simulationCommunicator_   = context->simulationCommunicator_;
2292     multiSimulation_          = context->multiSimulation_.get();
2293 }
2294
2295 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
2296
2297 Mdrunner::BuilderImplementation&
2298 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions&    options,
2299                                                       const real             forceWarningThreshold,
2300                                                       const StartingBehavior startingBehavior)
2301 {
2302     mdrunOptions_          = options;
2303     forceWarningThreshold_ = forceWarningThreshold;
2304     startingBehavior_      = startingBehavior;
2305     return *this;
2306 }
2307
2308 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions& options)
2309 {
2310     domdecOptions_ = options;
2311 }
2312
2313 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
2314 {
2315     nstlist_ = nstlist;
2316 }
2317
2318 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters& params)
2319 {
2320     replicaExchangeParameters_ = params;
2321 }
2322
2323 Mdrunner Mdrunner::BuilderImplementation::build()
2324 {
2325     auto newRunner = Mdrunner(std::move(mdModules_));
2326
2327     newRunner.mdrunOptions     = mdrunOptions_;
2328     newRunner.pforce           = forceWarningThreshold_;
2329     newRunner.startingBehavior = startingBehavior_;
2330     newRunner.domdecOptions    = domdecOptions_;
2331
2332     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
2333     newRunner.hw_opt = hardwareOptions_;
2334
2335     // No invariant to check. This parameter exists to optionally override other behavior.
2336     newRunner.nstlist_cmdline = nstlist_;
2337
2338     newRunner.replExParams = replicaExchangeParameters_;
2339
2340     newRunner.filenames = filenames_;
2341
2342     newRunner.libraryWorldCommunicator = libraryWorldCommunicator_;
2343
2344     newRunner.simulationCommunicator = simulationCommunicator_;
2345
2346     // nullptr is a valid value for the multisim handle
2347     newRunner.ms = multiSimulation_;
2348
2349     if (hwinfo_)
2350     {
2351         newRunner.hwinfo_ = hwinfo_;
2352     }
2353     else
2354     {
2355         GMX_THROW(gmx::APIError(
2356                 "MdrunnerBuilder::addHardwareDetectionResult() is required before build()"));
2357     }
2358
2359     if (inputHolder_)
2360     {
2361         newRunner.inputHolder_ = std::move(inputHolder_);
2362     }
2363     else
2364     {
2365         GMX_THROW(gmx::APIError("MdrunnerBuilder::addInput() is required before build()."));
2366     }
2367
2368     // \todo Clarify ownership and lifetime management for gmx_output_env_t
2369     // \todo Update sanity checking when output environment has clearly specified invariants.
2370     // Initialization and default values for oenv are not well specified in the current version.
2371     if (outputEnvironment_)
2372     {
2373         newRunner.oenv = outputEnvironment_;
2374     }
2375     else
2376     {
2377         GMX_THROW(gmx::APIError(
2378                 "MdrunnerBuilder::addOutputEnvironment() is required before build()"));
2379     }
2380
2381     newRunner.logFileHandle = logFileHandle_;
2382
2383     if (nbpu_opt_)
2384     {
2385         newRunner.nbpu_opt = nbpu_opt_;
2386     }
2387     else
2388     {
2389         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
2390     }
2391
2392     if (pme_opt_ && pme_fft_opt_)
2393     {
2394         newRunner.pme_opt     = pme_opt_;
2395         newRunner.pme_fft_opt = pme_fft_opt_;
2396     }
2397     else
2398     {
2399         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
2400     }
2401
2402     if (bonded_opt_)
2403     {
2404         newRunner.bonded_opt = bonded_opt_;
2405     }
2406     else
2407     {
2408         GMX_THROW(gmx::APIError(
2409                 "MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
2410     }
2411
2412     if (update_opt_)
2413     {
2414         newRunner.update_opt = update_opt_;
2415     }
2416     else
2417     {
2418         GMX_THROW(gmx::APIError(
2419                 "MdrunnerBuilder::addUpdateTaskAssignment() is required before build()  "));
2420     }
2421
2422
2423     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
2424
2425     if (stopHandlerBuilder_)
2426     {
2427         newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
2428     }
2429     else
2430     {
2431         newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
2432     }
2433
2434     return newRunner;
2435 }
2436
2437 void Mdrunner::BuilderImplementation::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo)
2438 {
2439     hwinfo_ = hwinfo;
2440 }
2441
2442 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
2443 {
2444     nbpu_opt_ = nbpu_opt;
2445 }
2446
2447 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt, const char* pme_fft_opt)
2448 {
2449     pme_opt_     = pme_opt;
2450     pme_fft_opt_ = pme_fft_opt;
2451 }
2452
2453 void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
2454 {
2455     bonded_opt_ = bonded_opt;
2456 }
2457
2458 void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt)
2459 {
2460     update_opt_ = update_opt;
2461 }
2462
2463 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions)
2464 {
2465     hardwareOptions_ = hardwareOptions;
2466 }
2467
2468 void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
2469 {
2470     filenames_ = filenames;
2471 }
2472
2473 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2474 {
2475     outputEnvironment_ = outputEnvironment;
2476 }
2477
2478 void Mdrunner::BuilderImplementation::addLogFile(t_fileio* logFileHandle)
2479 {
2480     logFileHandle_ = logFileHandle;
2481 }
2482
2483 void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2484 {
2485     stopHandlerBuilder_ = std::move(builder);
2486 }
2487
2488 void Mdrunner::BuilderImplementation::addInput(SimulationInputHandle inputHolder)
2489 {
2490     inputHolder_ = std::move(inputHolder);
2491 }
2492
2493 MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
2494                                  compat::not_null<SimulationContext*> context) :
2495     impl_{ std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules), context) }
2496 {
2497 }
2498
2499 MdrunnerBuilder::~MdrunnerBuilder() = default;
2500
2501 MdrunnerBuilder& MdrunnerBuilder::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo)
2502 {
2503     impl_->addHardwareDetectionResult(hwinfo);
2504     return *this;
2505 }
2506
2507 MdrunnerBuilder& MdrunnerBuilder::addSimulationMethod(const MdrunOptions&    options,
2508                                                       real                   forceWarningThreshold,
2509                                                       const StartingBehavior startingBehavior)
2510 {
2511     impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
2512     return *this;
2513 }
2514
2515 MdrunnerBuilder& MdrunnerBuilder::addDomainDecomposition(const DomdecOptions& options)
2516 {
2517     impl_->addDomdec(options);
2518     return *this;
2519 }
2520
2521 MdrunnerBuilder& MdrunnerBuilder::addNeighborList(int nstlist)
2522 {
2523     impl_->addVerletList(nstlist);
2524     return *this;
2525 }
2526
2527 MdrunnerBuilder& MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters& params)
2528 {
2529     impl_->addReplicaExchange(params);
2530     return *this;
2531 }
2532
2533 MdrunnerBuilder& MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
2534 {
2535     impl_->addNonBonded(nbpu_opt);
2536     return *this;
2537 }
2538
2539 MdrunnerBuilder& MdrunnerBuilder::addElectrostatics(const char* pme_opt, const char* pme_fft_opt)
2540 {
2541     // The builder method may become more general in the future, but in this version,
2542     // parameters for PME electrostatics are both required and the only parameters
2543     // available.
2544     if (pme_opt && pme_fft_opt)
2545     {
2546         impl_->addPME(pme_opt, pme_fft_opt);
2547     }
2548     else
2549     {
2550         GMX_THROW(
2551                 gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
2552     }
2553     return *this;
2554 }
2555
2556 MdrunnerBuilder& MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
2557 {
2558     impl_->addBondedTaskAssignment(bonded_opt);
2559     return *this;
2560 }
2561
2562 MdrunnerBuilder& MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt)
2563 {
2564     impl_->addUpdateTaskAssignment(update_opt);
2565     return *this;
2566 }
2567
2568 Mdrunner MdrunnerBuilder::build()
2569 {
2570     return impl_->build();
2571 }
2572
2573 MdrunnerBuilder& MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions)
2574 {
2575     impl_->addHardwareOptions(hardwareOptions);
2576     return *this;
2577 }
2578
2579 MdrunnerBuilder& MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
2580 {
2581     impl_->addFilenames(filenames);
2582     return *this;
2583 }
2584
2585 MdrunnerBuilder& MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2586 {
2587     impl_->addOutputEnvironment(outputEnvironment);
2588     return *this;
2589 }
2590
2591 MdrunnerBuilder& MdrunnerBuilder::addLogFile(t_fileio* logFileHandle)
2592 {
2593     impl_->addLogFile(logFileHandle);
2594     return *this;
2595 }
2596
2597 MdrunnerBuilder& MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2598 {
2599     impl_->addStopHandlerBuilder(std::move(builder));
2600     return *this;
2601 }
2602
2603 MdrunnerBuilder& MdrunnerBuilder::addInput(SimulationInputHandle input)
2604 {
2605     impl_->addInput(std::move(input));
2606     return *this;
2607 }
2608
2609 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder&&) noexcept = default;
2610
2611 MdrunnerBuilder& MdrunnerBuilder::operator=(MdrunnerBuilder&&) noexcept = default;
2612
2613 } // namespace gmx