src/gromacs/mdrun/runner.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the MD runner routine calling all integrators.
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include "runner.h"
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cinttypes>
  52 #include <csignal>
  53 #include <cstdlib>
  54 #include <cstring>
  55
  56 #include <algorithm>
  57 #include <memory>
  58
  59 #include "gromacs/commandline/filenm.h"
  60 #include "gromacs/domdec/builder.h"
  61 #include "gromacs/domdec/domdec.h"
  62 #include "gromacs/domdec/domdec_struct.h"
  63 #include "gromacs/domdec/gpuhaloexchange.h"
  64 #include "gromacs/domdec/localatomsetmanager.h"
  65 #include "gromacs/domdec/partition.h"
  66 #include "gromacs/ewald/ewald_utils.h"
  67 #include "gromacs/ewald/pme.h"
  68 #include "gromacs/ewald/pme_gpu_program.h"
  69 #include "gromacs/fileio/checkpoint.h"
  70 #include "gromacs/fileio/gmxfio.h"
  71 #include "gromacs/fileio/oenv.h"
  72 #include "gromacs/fileio/tpxio.h"
  73 #include "gromacs/gmxlib/network.h"
  74 #include "gromacs/gmxlib/nrnb.h"
  75 #include "gromacs/gpu_utils/gpu_utils.h"
  76 #include "gromacs/hardware/cpuinfo.h"
  77 #include "gromacs/hardware/detecthardware.h"
  78 #include "gromacs/hardware/printhardware.h"
  79 #include "gromacs/imd/imd.h"
  80 #include "gromacs/listed_forces/disre.h"
  81 #include "gromacs/listed_forces/gpubonded.h"
  82 #include "gromacs/listed_forces/orires.h"
  83 #include "gromacs/math/functions.h"
  84 #include "gromacs/math/utilities.h"
  85 #include "gromacs/math/vec.h"
  86 #include "gromacs/mdlib/boxdeformation.h"
  87 #include "gromacs/mdlib/broadcaststructs.h"
  88 #include "gromacs/mdlib/calc_verletbuf.h"
  89 #include "gromacs/mdlib/dispersioncorrection.h"
  90 #include "gromacs/mdlib/enerdata_utils.h"
  91 #include "gromacs/mdlib/force.h"
  92 #include "gromacs/mdlib/forcerec.h"
  93 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  94 #include "gromacs/mdlib/makeconstraints.h"
  95 #include "gromacs/mdlib/md_support.h"
  96 #include "gromacs/mdlib/mdatoms.h"
  97 #include "gromacs/mdlib/membed.h"
  98 #include "gromacs/mdlib/qmmm.h"
  99 #include "gromacs/mdlib/sighandler.h"
 100 #include "gromacs/mdlib/stophandler.h"
 101 #include "gromacs/mdrun/mdmodules.h"
 102 #include "gromacs/mdrun/simulationcontext.h"
 103 #include "gromacs/mdrunutility/handlerestart.h"
 104 #include "gromacs/mdrunutility/logging.h"
 105 #include "gromacs/mdrunutility/multisim.h"
 106 #include "gromacs/mdrunutility/printtime.h"
 107 #include "gromacs/mdrunutility/threadaffinity.h"
 108 #include "gromacs/mdtypes/commrec.h"
 109 #include "gromacs/mdtypes/enerdata.h"
 110 #include "gromacs/mdtypes/fcdata.h"
 111 #include "gromacs/mdtypes/group.h"
 112 #include "gromacs/mdtypes/inputrec.h"
 113 #include "gromacs/mdtypes/md_enums.h"
 114 #include "gromacs/mdtypes/mdrunoptions.h"
 115 #include "gromacs/mdtypes/observableshistory.h"
 116 #include "gromacs/mdtypes/simulation_workload.h"
 117 #include "gromacs/mdtypes/state.h"
 118 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 119 #include "gromacs/nbnxm/nbnxm.h"
 120 #include "gromacs/nbnxm/pairlist_tuning.h"
 121 #include "gromacs/pbcutil/pbc.h"
 122 #include "gromacs/pulling/output.h"
 123 #include "gromacs/pulling/pull.h"
 124 #include "gromacs/pulling/pull_rotation.h"
 125 #include "gromacs/restraint/manager.h"
 126 #include "gromacs/restraint/restraintmdmodule.h"
 127 #include "gromacs/restraint/restraintpotential.h"
 128 #include "gromacs/swap/swapcoords.h"
 129 #include "gromacs/taskassignment/decidegpuusage.h"
 130 #include "gromacs/taskassignment/resourcedivision.h"
 131 #include "gromacs/taskassignment/taskassignment.h"
 132 #include "gromacs/taskassignment/usergpuids.h"
 133 #include "gromacs/timing/gpu_timing.h"
 134 #include "gromacs/timing/wallcycle.h"
 135 #include "gromacs/timing/wallcyclereporting.h"
 136 #include "gromacs/topology/mtop_util.h"
 137 #include "gromacs/trajectory/trajectoryframe.h"
 138 #include "gromacs/utility/basenetwork.h"
 139 #include "gromacs/utility/cstringutil.h"
 140 #include "gromacs/utility/exceptions.h"
 141 #include "gromacs/utility/fatalerror.h"
 142 #include "gromacs/utility/filestream.h"
 143 #include "gromacs/utility/gmxassert.h"
 144 #include "gromacs/utility/gmxmpi.h"
 145 #include "gromacs/utility/keyvaluetree.h"
 146 #include "gromacs/utility/logger.h"
 147 #include "gromacs/utility/loggerbuilder.h"
 148 #include "gromacs/utility/mdmodulenotification.h"
 149 #include "gromacs/utility/physicalnodecommunicator.h"
 150 #include "gromacs/utility/pleasecite.h"
 151 #include "gromacs/utility/programcontext.h"
 152 #include "gromacs/utility/smalloc.h"
 153 #include "gromacs/utility/stringutil.h"
 154
 155 #include "isimulator.h"
 156 #include "replicaexchange.h"
 157 #include "simulatorbuilder.h"
 158
 159 #if GMX_FAHCORE
 160 #include "corewrap.h"
 161 #endif
 162
 163 namespace gmx
 164 {
 165
 166 /*! \brief environment variable to enable GPU P2P communication */
 167 static const bool c_enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr)
 168     && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
 169
 170 /*! \brief environment variable to enable GPU buffer operations */
 171 static const bool c_enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 172
 173 /*! \brief Manage any development feature flag variables encountered
 174  *
 175  * The use of dev features indicated by environment variables is
 176  * logged in order to ensure that runs with such features enabled can
 177  * be identified from their log and standard output. Any cross
 178  * dependencies are also checked, and if unsatisfied, a fatal error
 179  * issued.
 180  *
 181  * \param[in]  mdlog        Logger object.
 182  */
 183 static void manageDevelopmentFeatures(const gmx::MDLogger &mdlog)
 184 {
 185     const bool enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 186     const bool useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
 187     const bool enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
 188
 189     if (enableGpuBufOps)
 190     {
 191         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 192                 "NOTE: This run uses the 'GPU buffer ops' feature, enabled by the GMX_USE_GPU_BUFFER_OPS environment variable.");
 193     }
 194
 195     if (enableGpuHaloExchange)
 196     {
 197         if (!enableGpuBufOps)
 198         {
 199             gmx_fatal(FARGS, "Cannot enable GPU halo exchange without GPU buffer operations, set GMX_USE_GPU_BUFFER_OPS=1\n");
 200         }
 201     }
 202
 203     if (useGpuUpdateConstrain)
 204     {
 205         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 206                 "NOTE: This run uses the 'GPU update/constraints' feature, enabled by the GMX_UPDATE_CONSTRAIN_GPU environment variable.");
 207     }
 208
 209 }
 210
 211 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
 212  *
 213  * Used to ensure that the master thread does not modify mdrunner during copy
 214  * on the spawned threads. */
 215 static void threadMpiMdrunnerAccessBarrier()
 216 {
 217 #if GMX_THREAD_MPI
 218     MPI_Barrier(MPI_COMM_WORLD);
 219 #endif
 220 }
 221
 222 Mdrunner Mdrunner::cloneOnSpawnedThread() const
 223 {
 224     auto newRunner = Mdrunner(std::make_unique<MDModules>());
 225
 226     // All runners in the same process share a restraint manager resource because it is
 227     // part of the interface to the client code, which is associated only with the
 228     // original thread. Handles to the same resources can be obtained by copy.
 229     {
 230         newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
 231     }
 232
 233     // Copy members of master runner.
 234     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
 235     // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
 236     newRunner.hw_opt    = hw_opt;
 237     newRunner.filenames = filenames;
 238
 239     newRunner.oenv                = oenv;
 240     newRunner.mdrunOptions        = mdrunOptions;
 241     newRunner.domdecOptions       = domdecOptions;
 242     newRunner.nbpu_opt            = nbpu_opt;
 243     newRunner.pme_opt             = pme_opt;
 244     newRunner.pme_fft_opt         = pme_fft_opt;
 245     newRunner.bonded_opt          = bonded_opt;
 246     newRunner.update_opt          = update_opt;
 247     newRunner.nstlist_cmdline     = nstlist_cmdline;
 248     newRunner.replExParams        = replExParams;
 249     newRunner.pforce              = pforce;
 250     // Give the spawned thread the newly created valid communicator
 251     // for the simulation.
 252     newRunner.communicator        = MPI_COMM_WORLD;
 253     newRunner.ms                  = ms;
 254     newRunner.startingBehavior    = startingBehavior;
 255     newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
 256
 257     threadMpiMdrunnerAccessBarrier();
 258
 259     return newRunner;
 260 }
 261
 262 /*! \brief The callback used for running on spawned threads.
 263  *
 264  * Obtains the pointer to the master mdrunner object from the one
 265  * argument permitted to the thread-launch API call, copies it to make
 266  * a new runner for this thread, reinitializes necessary data, and
 267  * proceeds to the simulation. */
 268 static void mdrunner_start_fn(const void *arg)
 269 {
 270     try
 271     {
 272         auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg);
 273         /* copy the arg list to make sure that it's thread-local. This
 274            doesn't copy pointed-to items, of course; fnm, cr and fplog
 275            are reset in the call below, all others should be const. */
 276         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
 277         mdrunner.mdrunner();
 278     }
 279     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 280 }
 281
 282
 283 void Mdrunner::spawnThreads(int numThreadsToLaunch)
 284 {
 285 #if GMX_THREAD_MPI
 286     /* now spawn new threads that start mdrunner_start_fn(), while
 287        the main thread returns. Thread affinity is handled later. */
 288     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
 289                      mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
 290     {
 291         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
 292     }
 293
 294     // Give the master thread the newly created valid communicator for
 295     // the simulation.
 296     communicator = MPI_COMM_WORLD;
 297     threadMpiMdrunnerAccessBarrier();
 298 #else
 299     GMX_UNUSED_VALUE(numThreadsToLaunch);
 300     GMX_UNUSED_VALUE(mdrunner_start_fn);
 301 #endif
 302 }
 303
 304 }  // namespace gmx
 305
 306 /*! \brief Initialize variables for Verlet scheme simulation */
 307 static void prepare_verlet_scheme(FILE                           *fplog,
 308                                   t_commrec                      *cr,
 309                                   t_inputrec                     *ir,
 310                                   int                             nstlist_cmdline,
 311                                   const gmx_mtop_t               *mtop,
 312                                   const matrix                    box,
 313                                   bool                            makeGpuPairList,
 314                                   const gmx::CpuInfo             &cpuinfo)
 315 {
 316     /* For NVE simulations, we will retain the initial list buffer */
 317     if (EI_DYNAMICS(ir->eI) &&
 318         ir->verletbuf_tol > 0 &&
 319         !(EI_MD(ir->eI) && ir->etc == etcNO))
 320     {
 321         /* Update the Verlet buffer size for the current run setup */
 322
 323         /* Here we assume SIMD-enabled kernels are being used. But as currently
 324          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 325          * and 4x2 gives a larger buffer than 4x4, this is ok.
 326          */
 327         ListSetupType      listType  = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
 328         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
 329
 330         const real         rlist_new =
 331             calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
 332
 333         if (rlist_new != ir->rlist)
 334         {
 335             if (fplog != nullptr)
 336             {
 337                 fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 338                         ir->rlist, rlist_new,
 339                         listSetup.cluster_size_i, listSetup.cluster_size_j);
 340             }
 341             ir->rlist     = rlist_new;
 342         }
 343     }
 344
 345     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
 346     {
 347         gmx_fatal(FARGS, "Can not set nstlist without %s",
 348                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
 349     }
 350
 351     if (EI_DYNAMICS(ir->eI))
 352     {
 353         /* Set or try nstlist values */
 354         increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
 355     }
 356 }
 357
 358 /*! \brief Override the nslist value in inputrec
 359  *
 360  * with value passed on the command line (if any)
 361  */
 362 static void override_nsteps_cmdline(const gmx::MDLogger &mdlog,
 363                                     int64_t              nsteps_cmdline,
 364                                     t_inputrec          *ir)
 365 {
 366     assert(ir);
 367
 368     /* override with anything else than the default -2 */
 369     if (nsteps_cmdline > -2)
 370     {
 371         char sbuf_steps[STEPSTRSIZE];
 372         char sbuf_msg[STRLEN];
 373
 374         ir->nsteps = nsteps_cmdline;
 375         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
 376         {
 377             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
 378                     gmx_step_str(nsteps_cmdline, sbuf_steps),
 379                     fabs(nsteps_cmdline*ir->delta_t));
 380         }
 381         else
 382         {
 383             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
 384                     gmx_step_str(nsteps_cmdline, sbuf_steps));
 385         }
 386
 387         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
 388     }
 389     else if (nsteps_cmdline < -2)
 390     {
 391         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64,
 392                   nsteps_cmdline);
 393     }
 394     /* Do nothing if nsteps_cmdline == -2 */
 395 }
 396
 397 namespace gmx
 398 {
 399
 400 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
 401  *
 402  * If not, and if a warning may be issued, logs a warning about
 403  * falling back to CPU code. With thread-MPI, only the first
 404  * call to this function should have \c issueWarning true. */
 405 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
 406                                                const t_inputrec &ir,
 407                                                bool              issueWarning)
 408 {
 409     bool        gpuIsUseful = true;
 410     std::string warning;
 411
 412     if (ir.opts.ngener - ir.nwall > 1)
 413     {
 414         /* The GPU code does not support more than one energy group.
 415          * If the user requested GPUs explicitly, a fatal error is given later.
 416          */
 417         gpuIsUseful = false;
 418         warning     =
 419             "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
 420             "For better performance, run on the GPU without energy groups and then do "
 421             "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
 422     }
 423
 424     if (EI_TPI(ir.eI))
 425     {
 426         gpuIsUseful = false;
 427         warning     = "TPI is not implemented for GPUs.";
 428     }
 429
 430     if (!gpuIsUseful && issueWarning)
 431     {
 432         GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
 433     }
 434
 435     return gpuIsUseful;
 436 }
 437
 438 //! Initializes the logger for mdrun.
 439 static gmx::LoggerOwner buildLogger(FILE      *fplog,
 440                                     const bool isSimulationMasterRank)
 441 {
 442     gmx::LoggerBuilder builder;
 443     if (fplog != nullptr)
 444     {
 445         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
 446     }
 447     if (isSimulationMasterRank)
 448     {
 449         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning,
 450                                 &gmx::TextOutputFile::standardError());
 451     }
 452     return builder.build();
 453 }
 454
 455 //! Make a TaskTarget from an mdrun argument string.
 456 static TaskTarget findTaskTarget(const char *optionString)
 457 {
 458     TaskTarget returnValue = TaskTarget::Auto;
 459
 460     if (strncmp(optionString, "auto", 3) == 0)
 461     {
 462         returnValue = TaskTarget::Auto;
 463     }
 464     else if (strncmp(optionString, "cpu", 3) == 0)
 465     {
 466         returnValue = TaskTarget::Cpu;
 467     }
 468     else if (strncmp(optionString, "gpu", 3) == 0)
 469     {
 470         returnValue = TaskTarget::Gpu;
 471     }
 472     else
 473     {
 474         GMX_ASSERT(false, "Option string should have been checked for sanity already");
 475     }
 476
 477     return returnValue;
 478 }
 479
 480 //! Finish run, aggregate data to print performance info.
 481 static void finish_run(FILE *fplog,
 482                        const gmx::MDLogger &mdlog,
 483                        const t_commrec *cr,
 484                        const t_inputrec *inputrec,
 485                        t_nrnb nrnb[], gmx_wallcycle_t wcycle,
 486                        gmx_walltime_accounting_t walltime_accounting,
 487                        nonbonded_verlet_t *nbv,
 488                        const gmx_pme_t *pme,
 489                        gmx_bool bWriteStat)
 490 {
 491     double  delta_t  = 0;
 492     double  nbfs     = 0, mflop = 0;
 493     double  elapsed_time,
 494             elapsed_time_over_all_ranks,
 495             elapsed_time_over_all_threads,
 496             elapsed_time_over_all_threads_over_all_ranks;
 497     /* Control whether it is valid to print a report. Only the
 498        simulation master may print, but it should not do so if the run
 499        terminated e.g. before a scheduled reset step. This is
 500        complicated by the fact that PME ranks are unaware of the
 501        reason why they were sent a pmerecvqxFINISH. To avoid
 502        communication deadlocks, we always do the communication for the
 503        report, even if we've decided not to write the report, because
 504        how long it takes to finish the run is not important when we've
 505        decided not to report on the simulation performance.
 506
 507        Further, we only report performance for dynamical integrators,
 508        because those are the only ones for which we plan to
 509        consider doing any optimizations. */
 510     bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
 511
 512     if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
 513     {
 514         GMX_LOG(mdlog.warning).asParagraph().appendText("Simulation ended prematurely, no performance report will be written.");
 515         printReport = false;
 516     }
 517
 518     t_nrnb                  *nrnb_tot;
 519     std::unique_ptr<t_nrnb>  nrnbTotalStorage;
 520     if (cr->nnodes > 1)
 521     {
 522         nrnbTotalStorage = std::make_unique<t_nrnb>();
 523         nrnb_tot         = nrnbTotalStorage.get();
 524 #if GMX_MPI
 525         MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
 526                       cr->mpi_comm_mysim);
 527 #endif
 528     }
 529     else
 530     {
 531         nrnb_tot = nrnb;
 532     }
 533
 534     elapsed_time                  = walltime_accounting_get_time_since_reset(walltime_accounting);
 535     elapsed_time_over_all_threads = walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
 536     if (cr->nnodes > 1)
 537     {
 538 #if GMX_MPI
 539         /* reduce elapsed_time over all MPI ranks in the current simulation */
 540         MPI_Allreduce(&elapsed_time,
 541                       &elapsed_time_over_all_ranks,
 542                       1, MPI_DOUBLE, MPI_SUM,
 543                       cr->mpi_comm_mysim);
 544         elapsed_time_over_all_ranks /= cr->nnodes;
 545         /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
 546          * current simulation. */
 547         MPI_Allreduce(&elapsed_time_over_all_threads,
 548                       &elapsed_time_over_all_threads_over_all_ranks,
 549                       1, MPI_DOUBLE, MPI_SUM,
 550                       cr->mpi_comm_mysim);
 551 #endif
 552     }
 553     else
 554     {
 555         elapsed_time_over_all_ranks                  = elapsed_time;
 556         elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
 557     }
 558
 559     if (printReport)
 560     {
 561         print_flop(fplog, nrnb_tot, &nbfs, &mflop);
 562     }
 563
 564     if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
 565     {
 566         print_dd_statistics(cr, inputrec, fplog);
 567     }
 568
 569     /* TODO Move the responsibility for any scaling by thread counts
 570      * to the code that handled the thread region, so that there's a
 571      * mechanism to keep cycle counting working during the transition
 572      * to task parallelism. */
 573     int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 574     int nthreads_pme = gmx_omp_nthreads_get(emntPME);
 575     wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), nthreads_pp, nthreads_pme);
 576     auto cycle_sum(wallcycle_sum(cr, wcycle));
 577
 578     if (printReport)
 579     {
 580         auto                    nbnxn_gpu_timings = (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
 581         gmx_wallclock_gpu_pme_t pme_gpu_timings   = {};
 582
 583         if (pme_gpu_task_enabled(pme))
 584         {
 585             pme_gpu_get_timings(pme, &pme_gpu_timings);
 586         }
 587         wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
 588                         elapsed_time_over_all_ranks,
 589                         wcycle, cycle_sum,
 590                         nbnxn_gpu_timings,
 591                         &pme_gpu_timings);
 592
 593         if (EI_DYNAMICS(inputrec->eI))
 594         {
 595             delta_t = inputrec->delta_t;
 596         }
 597
 598         if (fplog)
 599         {
 600             print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
 601                        elapsed_time_over_all_ranks,
 602                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 603                        delta_t, nbfs, mflop);
 604         }
 605         if (bWriteStat)
 606         {
 607             print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
 608                        elapsed_time_over_all_ranks,
 609                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 610                        delta_t, nbfs, mflop);
 611         }
 612     }
 613 }
 614
 615 int Mdrunner::mdrunner()
 616 {
 617     matrix                    box;
 618     t_forcerec               *fr               = nullptr;
 619     t_fcdata                 *fcd              = nullptr;
 620     real                      ewaldcoeff_q     = 0;
 621     real                      ewaldcoeff_lj    = 0;
 622     int                       nChargePerturbed = -1, nTypePerturbed = 0;
 623     gmx_wallcycle_t           wcycle;
 624     gmx_walltime_accounting_t walltime_accounting = nullptr;
 625     gmx_membed_t *            membed              = nullptr;
 626     gmx_hw_info_t            *hwinfo              = nullptr;
 627
 628     /* CAUTION: threads may be started later on in this function, so
 629        cr doesn't reflect the final parallel state right now */
 630     gmx_mtop_t                      mtop;
 631
 632     bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data());
 633     bool doRerun  = mdrunOptions.rerun;
 634
 635     // Handle task-assignment related user options.
 636     EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
 637                                                EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
 638
 639     std::vector<int> userGpuTaskAssignment;
 640     try
 641     {
 642         userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
 643     }
 644     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 645     auto       nonbondedTarget = findTaskTarget(nbpu_opt);
 646     auto       pmeTarget       = findTaskTarget(pme_opt);
 647     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
 648     auto       bondedTarget    = findTaskTarget(bonded_opt);
 649     auto       updateTarget    = findTaskTarget(update_opt);
 650     PmeRunMode pmeRunMode      = PmeRunMode::None;
 651
 652     FILE      *fplog = nullptr;
 653     // If we are appending, we don't write log output because we need
 654     // to check that the old log file matches what the checkpoint file
 655     // expects. Otherwise, we should start to write log output now if
 656     // there is a file ready for it.
 657     if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
 658     {
 659         fplog = gmx_fio_getfp(logFileHandle);
 660     }
 661     const bool       isSimulationMasterRank = findIsSimulationMasterRank(ms, communicator);
 662     gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank));
 663     gmx::MDLogger    mdlog(logOwner.logger());
 664
 665     // report any development features that may be enabled by environment variables
 666     manageDevelopmentFeatures(mdlog);
 667
 668     // TODO The thread-MPI master rank makes a working
 669     // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks
 670     // after the threads have been launched. This works because no use
 671     // is made of that communicator until after the execution paths
 672     // have rejoined. But it is likely that we can improve the way
 673     // this is expressed, e.g. by expressly running detection only the
 674     // master rank for thread-MPI, rather than relying on the mutex
 675     // and reference count.
 676     PhysicalNodeCommunicator physicalNodeComm(communicator, gmx_physicalnode_id_hash());
 677     hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
 678
 679     gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
 680
 681     std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
 682
 683     // Print citation requests after all software/hardware printing
 684     pleaseCiteGromacs(fplog);
 685
 686     // TODO Replace this by unique_ptr once t_inputrec is C++
 687     t_inputrec               inputrecInstance;
 688     t_inputrec              *inputrec = nullptr;
 689     std::unique_ptr<t_state> globalState;
 690
 691     auto                     partialDeserializedTpr = std::make_unique<PartialDeserializedTprFile>();
 692
 693     if (isSimulationMasterRank)
 694     {
 695         /* Only the master rank has the global state */
 696         globalState = std::make_unique<t_state>();
 697
 698         /* Read (nearly) all data required for the simulation
 699          * and keep the partly serialized tpr contents to send to other ranks later
 700          */
 701         *partialDeserializedTpr = read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), &inputrecInstance, globalState.get(), &mtop);
 702         inputrec                = &inputrecInstance;
 703     }
 704
 705     /* Check and update the hardware options for internal consistency */
 706     checkAndUpdateHardwareOptions(mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks, inputrec);
 707
 708     if (GMX_THREAD_MPI && isSimulationMasterRank)
 709     {
 710         bool useGpuForNonbonded = false;
 711         bool useGpuForPme       = false;
 712         try
 713         {
 714             GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
 715
 716             // If the user specified the number of ranks, then we must
 717             // respect that, but in default mode, we need to allow for
 718             // the number of GPUs to choose the number of ranks.
 719             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 720             useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
 721                     (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
 722                     canUseGpuForNonbonded,
 723                     gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
 724                     hw_opt.nthreads_tmpi);
 725             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
 726                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
 727                     *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
 728
 729         }
 730         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 731
 732         /* Determine how many thread-MPI ranks to start.
 733          *
 734          * TODO Over-writing the user-supplied value here does
 735          * prevent any possible subsequent checks from working
 736          * correctly. */
 737         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
 738                                                 &hw_opt,
 739                                                 gpuIdsToUse,
 740                                                 useGpuForNonbonded,
 741                                                 useGpuForPme,
 742                                                 inputrec, &mtop,
 743                                                 mdlog,
 744                                                 doMembed);
 745
 746         // Now start the threads for thread MPI.
 747         spawnThreads(hw_opt.nthreads_tmpi);
 748         // The spawned threads enter mdrunner() and execution of
 749         // master and spawned threads joins at the end of this block.
 750         physicalNodeComm = PhysicalNodeCommunicator(communicator, gmx_physicalnode_id_hash());
 751     }
 752
 753     GMX_RELEASE_ASSERT(communicator == MPI_COMM_WORLD, "Must have valid world communicator");
 754     CommrecHandle crHandle = init_commrec(communicator, ms);
 755     t_commrec    *cr       = crHandle.get();
 756     GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec");
 757
 758     if (PAR(cr))
 759     {
 760         /* now broadcast everything to the non-master nodes/threads: */
 761         if (!isSimulationMasterRank)
 762         {
 763             inputrec = &inputrecInstance;
 764         }
 765         init_parallel(cr, inputrec, &mtop, partialDeserializedTpr.get());
 766     }
 767     GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now");
 768     partialDeserializedTpr.reset(nullptr);
 769
 770     // Now the number of ranks is known to all ranks, and each knows
 771     // the inputrec read by the master rank. The ranks can now all run
 772     // the task-deciding functions and will agree on the result
 773     // without needing to communicate.
 774     //
 775     // TODO Should we do the communication in debug mode to support
 776     // having an assertion?
 777     //
 778     // Note that these variables describe only their own node.
 779     //
 780     // Note that when bonded interactions run on a GPU they always run
 781     // alongside a nonbonded task, so do not influence task assignment
 782     // even though they affect the force calculation workload.
 783     bool useGpuForNonbonded     = false;
 784     bool useGpuForPme           = false;
 785     bool useGpuForBonded        = false;
 786     bool useGpuForUpdate        = false;
 787     bool gpusWereDetected       = hwinfo->ngpu_compatible_tot > 0;
 788     try
 789     {
 790         // It's possible that there are different numbers of GPUs on
 791         // different nodes, which is the user's responsibility to
 792         // handle. If unsuitable, we will notice that during task
 793         // assignment.
 794         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 795         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
 796                                                                 emulateGpuNonbonded,
 797                                                                 canUseGpuForNonbonded,
 798                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI),
 799                                                                 gpusWereDetected);
 800         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
 801                                                     *hwinfo, *inputrec, mtop,
 802                                                     cr->nnodes, domdecOptions.numPmeRanks,
 803                                                     gpusWereDetected);
 804         auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
 805         useGpuForBonded =
 806             decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
 807                                             bondedTarget, canUseGpuForBonded,
 808                                             EVDW_PME(inputrec->vdwtype),
 809                                             EEL_PME_EWALD(inputrec->coulombtype),
 810                                             domdecOptions.numPmeRanks, gpusWereDetected);
 811
 812         pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
 813         if (pmeRunMode == PmeRunMode::GPU)
 814         {
 815             if (pmeFftTarget == TaskTarget::Cpu)
 816             {
 817                 pmeRunMode = PmeRunMode::Mixed;
 818             }
 819         }
 820         else if (pmeFftTarget == TaskTarget::Gpu)
 821         {
 822             gmx_fatal(FARGS, "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME on CPU you should not be using -pmefft.");
 823         }
 824     }
 825     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 826
 827     // Build restraints.
 828     // TODO: hide restraint implementation details from Mdrunner.
 829     // There is nothing unique about restraints at this point as far as the
 830     // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
 831     // factory functions from the SimulationContext on which to call mdModules_->add().
 832     // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
 833     for (auto && restraint : restraintManager_->getRestraints())
 834     {
 835         auto module = RestraintMDModule::create(restraint,
 836                                                 restraint->sites());
 837         mdModules_->add(std::move(module));
 838     }
 839
 840     // TODO: Error handling
 841     mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
 842     const auto &mdModulesNotifier = mdModules_->notifier().notifier_;
 843
 844     if (inputrec->internalParameters != nullptr)
 845     {
 846         mdModulesNotifier.notify(*inputrec->internalParameters);
 847     }
 848
 849     if (fplog != nullptr)
 850     {
 851         pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
 852         fprintf(fplog, "\n");
 853     }
 854
 855     if (SIMMASTER(cr))
 856     {
 857         /* In rerun, set velocities to zero if present */
 858         if (doRerun && ((globalState->flags & (1 << estV)) != 0))
 859         {
 860             // rerun does not use velocities
 861             GMX_LOG(mdlog.info).asParagraph().appendText(
 862                     "Rerun trajectory contains velocities. Rerun does only evaluate "
 863                     "potential energy and forces. The velocities will be ignored.");
 864             for (int i = 0; i < globalState->natoms; i++)
 865             {
 866                 clear_rvec(globalState->v[i]);
 867             }
 868             globalState->flags &= ~(1 << estV);
 869         }
 870
 871         /* now make sure the state is initialized and propagated */
 872         set_state_entries(globalState.get(), inputrec);
 873     }
 874
 875     /* NM and TPI parallelize over force/energy calculations, not atoms,
 876      * so we need to initialize and broadcast the global state.
 877      */
 878     if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
 879     {
 880         if (!MASTER(cr))
 881         {
 882             globalState = std::make_unique<t_state>();
 883         }
 884         broadcastStateWithoutDynamics(cr, globalState.get());
 885     }
 886
 887     /* A parallel command line option consistency check that we can
 888        only do after any threads have started. */
 889     if (!PAR(cr) && (domdecOptions.numCells[XX] > 1 ||
 890                      domdecOptions.numCells[YY] > 1 ||
 891                      domdecOptions.numCells[ZZ] > 1 ||
 892                      domdecOptions.numPmeRanks > 0))
 893     {
 894         gmx_fatal(FARGS,
 895                   "The -dd or -npme option request a parallel simulation, "
 896 #if !GMX_MPI
 897                   "but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv));
 898 #elif GMX_THREAD_MPI
 899                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
 900 #else
 901                   "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv));
 902 #endif
 903     }
 904
 905     if (doRerun &&
 906         (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 907     {
 908         gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 909     }
 910
 911     if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
 912     {
 913         if (domdecOptions.numPmeRanks > 0)
 914         {
 915             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
 916                                  "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
 917         }
 918
 919         domdecOptions.numPmeRanks = 0;
 920     }
 921
 922     if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
 923     {
 924         /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
 925          * improve performance with many threads per GPU, since our OpenMP
 926          * scaling is bad, but it's difficult to automate the setup.
 927          */
 928         domdecOptions.numPmeRanks = 0;
 929     }
 930     if (useGpuForPme)
 931     {
 932         if (domdecOptions.numPmeRanks < 0)
 933         {
 934             domdecOptions.numPmeRanks = 0;
 935             // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
 936         }
 937         else
 938         {
 939             GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported");
 940         }
 941     }
 942
 943 #if GMX_FAHCORE
 944     if (MASTER(cr))
 945     {
 946         fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
 947     }
 948 #endif
 949
 950     /* NMR restraints must be initialized before load_checkpoint,
 951      * since with time averaging the history is added to t_state.
 952      * For proper consistency check we therefore need to extend
 953      * t_state here.
 954      * So the PME-only nodes (if present) will also initialize
 955      * the distance restraints.
 956      */
 957     snew(fcd, 1);
 958
 959     /* This needs to be called before read_checkpoint to extend the state */
 960     init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
 961
 962     init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
 963
 964     auto                 deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
 965
 966     ObservablesHistory   observablesHistory = {};
 967
 968     if (startingBehavior != StartingBehavior::NewSimulation)
 969     {
 970         /* Check if checkpoint file exists before doing continuation.
 971          * This way we can use identical input options for the first and subsequent runs...
 972          */
 973         if (mdrunOptions.numStepsCommandline > -2)
 974         {
 975             /* Temporarily set the number of steps to unlimited to avoid
 976              * triggering the nsteps check in load_checkpoint().
 977              * This hack will go away soon when the -nsteps option is removed.
 978              */
 979             inputrec->nsteps = -1;
 980         }
 981
 982         load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr),
 983                         logFileHandle,
 984                         cr, domdecOptions.numCells,
 985                         inputrec, globalState.get(),
 986                         &observablesHistory,
 987                         mdrunOptions.reproducible, mdModules_->notifier());
 988
 989         if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
 990         {
 991             // Now we can start normal logging to the truncated log file.
 992             fplog    = gmx_fio_getfp(logFileHandle);
 993             prepareLogAppending(fplog);
 994             logOwner = buildLogger(fplog, MASTER(cr));
 995             mdlog    = logOwner.logger();
 996         }
 997     }
 998
 999     if (mdrunOptions.numStepsCommandline > -2)
1000     {
1001         GMX_LOG(mdlog.info).asParagraph().
1002             appendText("The -nsteps functionality is deprecated, and may be removed in a future version. "
1003                        "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp file field.");
1004     }
1005     /* override nsteps with value set on the commandline */
1006     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
1007
1008     if (SIMMASTER(cr))
1009     {
1010         copy_mat(globalState->box, box);
1011     }
1012
1013     if (PAR(cr))
1014     {
1015         gmx_bcast(sizeof(box), box, cr);
1016     }
1017
1018     if (inputrec->cutoff_scheme != ecutsVERLET)
1019     {
1020         gmx_fatal(FARGS, "This group-scheme .tpr file can no longer be run by mdrun. Please update to the Verlet scheme, or use an earlier version of GROMACS if necessary.");
1021     }
1022     /* Update rlist and nstlist. */
1023     prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
1024                           useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
1025
1026     const bool prefer1DAnd1PulseDD = (c_enableGpuHaloExchange && useGpuForNonbonded);
1027     // This builder is necessary while we have multi-part construction
1028     // of DD. Before DD is constructed, we use the existence of
1029     // the builder object to indicate that further construction of DD
1030     // is needed.
1031     std::unique_ptr<DomainDecompositionBuilder> ddBuilder;
1032     if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
1033                      inputrec->eI == eiNM))
1034     {
1035         ddBuilder = std::make_unique<DomainDecompositionBuilder>
1036                 (mdlog, cr, domdecOptions, mdrunOptions,
1037                 prefer1DAnd1PulseDD, mtop, *inputrec,
1038                 box, positionsFromStatePointer(globalState.get()));
1039     }
1040     else
1041     {
1042         /* PME, if used, is done on all nodes with 1D decomposition */
1043         cr->npmenodes = 0;
1044         cr->duty      = (DUTY_PP | DUTY_PME);
1045
1046         if (inputrec->ePBC == epbcSCREW)
1047         {
1048             gmx_fatal(FARGS,
1049                       "pbc=screw is only implemented with domain decomposition");
1050         }
1051     }
1052
1053     // Produce the task assignment for this rank.
1054     GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
1055     GpuTaskAssignments        gpuTaskAssignments =
1056         gpuTaskAssignmentsBuilder.build(gpuIdsToUse,
1057                                         userGpuTaskAssignment,
1058                                         *hwinfo,
1059                                         cr,
1060                                         ms,
1061                                         physicalNodeComm,
1062                                         nonbondedTarget,
1063                                         pmeTarget,
1064                                         bondedTarget,
1065                                         updateTarget,
1066                                         useGpuForNonbonded,
1067                                         useGpuForPme,
1068                                         thisRankHasDuty(cr, DUTY_PP),
1069                                         // TODO cr->duty & DUTY_PME should imply that a PME
1070                                         // algorithm is active, but currently does not.
1071                                         EEL_PME(inputrec->coulombtype) &&
1072                                         thisRankHasDuty(cr, DUTY_PME));
1073
1074     const bool printHostName = (cr->nnodes > 1);
1075     gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode);
1076
1077     // If the user chose a task assignment, give them some hints
1078     // where appropriate.
1079     if (!userGpuTaskAssignment.empty())
1080     {
1081         gpuTaskAssignments.logPerformanceHints(mdlog,
1082                                                ssize(gpuIdsToUse));
1083     }
1084
1085     // Get the device handles for the modules, nullptr when no task is assigned.
1086     gmx_device_info_t *nonbondedDeviceInfo   = gpuTaskAssignments.initNonbondedDevice(cr);
1087     gmx_device_info_t *pmeDeviceInfo         = gpuTaskAssignments.initPmeDevice();
1088
1089     // TODO Initialize GPU streams here.
1090
1091     // TODO Currently this is always built, yet DD partition code
1092     // checks if it is built before using it. Probably it should
1093     // become an MDModule that is made only when another module
1094     // requires it (e.g. pull, CompEl, density fitting), so that we
1095     // don't update the local atom sets unilaterally every step.
1096     LocalAtomSetManager atomSets;
1097     if (ddBuilder)
1098     {
1099         // TODO Pass the GPU streams to ddBuilder to use in buffer
1100         // transfers (e.g. halo exchange)
1101         cr->dd = ddBuilder->build(&atomSets);
1102         // The builder's job is done, so destruct it
1103         ddBuilder.reset(nullptr);
1104         // Note that local state still does not exist yet.
1105     }
1106
1107     if (PAR(cr))
1108     {
1109         /* After possible communicator splitting in make_dd_communicators.
1110          * we can set up the intra/inter node communication.
1111          */
1112         gmx_setup_nodecomm(fplog, cr);
1113     }
1114
1115 #if GMX_MPI
1116     if (isMultiSim(ms))
1117     {
1118         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1119                 "This is simulation %d out of %d running as a composite GROMACS\n"
1120                 "multi-simulation job. Setup for this simulation:\n",
1121                 ms->sim, ms->nsim);
1122     }
1123     GMX_LOG(mdlog.warning).appendTextFormatted(
1124             "Using %d MPI %s\n",
1125             cr->nnodes,
1126 #if GMX_THREAD_MPI
1127             cr->nnodes == 1 ? "thread" : "threads"
1128 #else
1129             cr->nnodes == 1 ? "process" : "processes"
1130 #endif
1131             );
1132     fflush(stderr);
1133 #endif
1134
1135     // If mdrun -pin auto honors any affinity setting that already
1136     // exists. If so, it is nice to provide feedback about whether
1137     // that existing affinity setting was from OpenMP or something
1138     // else, so we run this code both before and after we initialize
1139     // the OpenMP support.
1140     gmx_check_thread_affinity_set(mdlog,
1141                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
1142     /* Check and update the number of OpenMP threads requested */
1143     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
1144                                             pmeRunMode, mtop, *inputrec);
1145
1146     gmx_omp_nthreads_init(mdlog, cr,
1147                           hwinfo->nthreads_hw_avail,
1148                           physicalNodeComm.size_,
1149                           hw_opt.nthreads_omp,
1150                           hw_opt.nthreads_omp_pme,
1151                           !thisRankHasDuty(cr, DUTY_PP));
1152
1153     // Enable FP exception detection, but not in
1154     // Release mode and not for compilers with known buggy FP
1155     // exception support (clang with any optimization) or suspected
1156     // buggy FP exception support (gcc 7.* with optimization).
1157 #if !defined NDEBUG && \
1158     !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
1159     && defined __OPTIMIZE__)
1160     const bool bEnableFPE = true;
1161 #else
1162     const bool bEnableFPE = false;
1163 #endif
1164     //FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
1165     if (bEnableFPE)
1166     {
1167         gmx_feenableexcept();
1168     }
1169
1170     /* Now that we know the setup is consistent, check for efficiency */
1171     check_resource_division_efficiency(hwinfo,
1172                                        gpuTaskAssignments.thisRankHasAnyGpuTask(),
1173                                        mdrunOptions.ntompOptionIsSet,
1174                                        cr,
1175                                        mdlog);
1176
1177     /* getting number of PP/PME threads on this MPI / tMPI rank.
1178        PME: env variable should be read only on one node to make sure it is
1179        identical everywhere;
1180      */
1181     const int numThreadsOnThisRank =
1182         thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded) : gmx_omp_nthreads_get(emntPME);
1183     checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid,
1184                                   *hwinfo->hardwareTopology,
1185                                   physicalNodeComm, mdlog);
1186
1187     if (hw_opt.threadAffinity != ThreadAffinity::Off)
1188     {
1189         /* Before setting affinity, check whether the affinity has changed
1190          * - which indicates that probably the OpenMP library has changed it
1191          * since we first checked).
1192          */
1193         gmx_check_thread_affinity_set(mdlog,
1194                                       &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
1195
1196         int numThreadsOnThisNode, intraNodeThreadOffset;
1197         analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
1198                                  &intraNodeThreadOffset);
1199
1200         /* Set the CPU affinity */
1201         gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology,
1202                                 numThreadsOnThisRank, numThreadsOnThisNode,
1203                                 intraNodeThreadOffset, nullptr);
1204     }
1205
1206     if (mdrunOptions.timingOptions.resetStep > -1)
1207     {
1208         GMX_LOG(mdlog.info).asParagraph().
1209             appendText("The -resetstep functionality is deprecated, and may be removed in a future version.");
1210     }
1211     wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1212
1213     if (PAR(cr))
1214     {
1215         /* Master synchronizes its value of reset_counters with all nodes
1216          * including PME only nodes */
1217         int64_t reset_counters = wcycle_get_reset_counters(wcycle);
1218         gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
1219         wcycle_set_reset_counters(wcycle, reset_counters);
1220     }
1221
1222     // Membrane embedding must be initialized before we call init_forcerec()
1223     if (doMembed)
1224     {
1225         if (MASTER(cr))
1226         {
1227             fprintf(stderr, "Initializing membed");
1228         }
1229         /* Note that membed cannot work in parallel because mtop is
1230          * changed here. Fix this if we ever want to make it run with
1231          * multiple ranks. */
1232         membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec, globalState.get(), cr,
1233                              &mdrunOptions
1234                                  .checkpointOptions.period);
1235     }
1236
1237     const bool                   thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
1238     std::unique_ptr<MDAtoms>     mdAtoms;
1239     std::unique_ptr<gmx_vsite_t> vsite;
1240
1241     t_nrnb nrnb;
1242     if (thisRankHasDuty(cr, DUTY_PP))
1243     {
1244         mdModulesNotifier.notify(*cr);
1245         mdModulesNotifier.notify(&atomSets);
1246         mdModulesNotifier.notify(PeriodicBoundaryConditionType {inputrec->ePBC});
1247         /* Initiate forcerecord */
1248         fr                 = new t_forcerec;
1249         fr->forceProviders = mdModules_->initForceProviders();
1250         init_forcerec(fplog, mdlog, fr, fcd,
1251                       inputrec, &mtop, cr, box,
1252                       opt2fn("-table", filenames.size(), filenames.data()),
1253                       opt2fn("-tablep", filenames.size(), filenames.data()),
1254                       opt2fns("-tableb", filenames.size(), filenames.data()),
1255                       *hwinfo, nonbondedDeviceInfo,
1256                       useGpuForBonded,
1257                       pforce,
1258                       wcycle);
1259
1260         // TODO Move this to happen during domain decomposition setup,
1261         // once stream and event handling works well with that.
1262         // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
1263         if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
1264         {
1265             GMX_RELEASE_ASSERT(c_enableGpuBufOps, "Must use GMX_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
1266             void *streamLocal                   = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
1267             void *streamNonLocal                =
1268                 Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
1269             void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event();
1270             GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1271                     "NOTE: This run uses the 'GPU halo exchange' feature, enabled by the GMX_GPU_DD_COMMS environment variable.");
1272             cr->dd->gpuHaloExchange = std::make_unique<GpuHaloExchange>(cr->dd, cr->mpi_comm_mysim, streamLocal,
1273                                                                         streamNonLocal, coordinatesOnDeviceEvent);
1274         }
1275
1276         /* Initialize the mdAtoms structure.
1277          * mdAtoms is not filled with atom data,
1278          * as this can not be done now with domain decomposition.
1279          */
1280         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1281         if (globalState && thisRankHasPmeGpuTask)
1282         {
1283             // The pinning of coordinates in the global state object works, because we only use
1284             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1285             // points to the global state object without DD.
1286             // FIXME: MD and EM separately set up the local state - this should happen in the same function,
1287             // which should also perform the pinning.
1288             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1289         }
1290
1291         /* Initialize the virtual site communication */
1292         vsite = initVsite(mtop, cr);
1293
1294         calc_shifts(box, fr->shift_vec);
1295
1296         /* With periodic molecules the charge groups should be whole at start up
1297          * and the virtual sites should not be far from their proper positions.
1298          */
1299         if (!inputrec->bContinuation && MASTER(cr) &&
1300             !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
1301         {
1302             /* Make molecules whole at start of run */
1303             if (fr->ePBC != epbcNONE)
1304             {
1305                 do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
1306             }
1307             if (vsite)
1308             {
1309                 /* Correct initial vsite positions are required
1310                  * for the initial distribution in the domain decomposition
1311                  * and for the initial shell prediction.
1312                  */
1313                 constructVsitesGlobal(mtop, globalState->x);
1314             }
1315         }
1316
1317         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1318         {
1319             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1320             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1321         }
1322     }
1323     else
1324     {
1325         /* This is a PME only node */
1326
1327         GMX_ASSERT(globalState == nullptr, "We don't need the state on a PME only rank and expect it to be unitialized");
1328
1329         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1330         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1331     }
1332
1333     gmx_pme_t *sepPmeData = nullptr;
1334     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1335     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), "Double-checking that only PME-only ranks have no forcerec");
1336     gmx_pme_t * &pmedata = fr ? fr->pmedata : sepPmeData;
1337
1338     // TODO should live in ewald module once its testing is improved
1339     //
1340     // Later, this program could contain kernels that might be later
1341     // re-used as auto-tuning progresses, or subsequent simulations
1342     // are invoked.
1343     PmeGpuProgramStorage pmeGpuProgram;
1344     if (thisRankHasPmeGpuTask)
1345     {
1346         pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
1347     }
1348
1349     /* Initiate PME if necessary,
1350      * either on all nodes or on dedicated PME nodes only. */
1351     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1352     {
1353         if (mdAtoms && mdAtoms->mdatoms())
1354         {
1355             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1356             if (EVDW_PME(inputrec->vdwtype))
1357             {
1358                 nTypePerturbed   = mdAtoms->mdatoms()->nTypePerturbed;
1359             }
1360         }
1361         if (cr->npmenodes > 0)
1362         {
1363             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1364             gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
1365             gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
1366         }
1367
1368         if (thisRankHasDuty(cr, DUTY_PME))
1369         {
1370             try
1371             {
1372                 pmedata = gmx_pme_init(cr,
1373                                        getNumPmeDomains(cr->dd),
1374                                        inputrec,
1375                                        nChargePerturbed != 0, nTypePerturbed != 0,
1376                                        mdrunOptions.reproducible,
1377                                        ewaldcoeff_q, ewaldcoeff_lj,
1378                                        gmx_omp_nthreads_get(emntPME),
1379                                        pmeRunMode, nullptr,
1380                                        pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
1381             }
1382             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1383         }
1384     }
1385
1386
1387     if (EI_DYNAMICS(inputrec->eI))
1388     {
1389         /* Turn on signal handling on all nodes */
1390         /*
1391          * (A user signal from the PME nodes (if any)
1392          * is communicated to the PP nodes.
1393          */
1394         signal_handler_install();
1395     }
1396
1397     pull_t *pull_work = nullptr;
1398     if (thisRankHasDuty(cr, DUTY_PP))
1399     {
1400         /* Assumes uniform use of the number of OpenMP threads */
1401         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
1402
1403         if (inputrec->bPull)
1404         {
1405             /* Initialize pull code */
1406             pull_work =
1407                 init_pull(fplog, inputrec->pull, inputrec,
1408                           &mtop, cr, &atomSets, inputrec->fepvals->init_lambda);
1409             if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
1410             {
1411                 initPullHistory(pull_work, &observablesHistory);
1412             }
1413             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1414             {
1415                 init_pull_output_files(pull_work,
1416                                        filenames.size(), filenames.data(), oenv,
1417                                        startingBehavior);
1418             }
1419         }
1420
1421         std::unique_ptr<EnforcedRotation> enforcedRotation;
1422         if (inputrec->bRot)
1423         {
1424             /* Initialize enforced rotation code */
1425             enforcedRotation = init_rot(fplog,
1426                                         inputrec,
1427                                         filenames.size(),
1428                                         filenames.data(),
1429                                         cr,
1430                                         &atomSets,
1431                                         globalState.get(),
1432                                         &mtop,
1433                                         oenv,
1434                                         mdrunOptions,
1435                                         startingBehavior);
1436         }
1437
1438         t_swap *swap = nullptr;
1439         if (inputrec->eSwapCoords != eswapNO)
1440         {
1441             /* Initialize ion swapping code */
1442             swap = init_swapcoords(fplog, inputrec,
1443                                    opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
1444                                    &mtop, globalState.get(), &observablesHistory,
1445                                    cr, &atomSets, oenv, mdrunOptions,
1446                                    startingBehavior);
1447         }
1448
1449         /* Let makeConstraints know whether we have essential dynamics constraints.
1450          * TODO: inputrec should tell us whether we use an algorithm, not a file option or the checkpoint
1451          */
1452         bool doEssentialDynamics = (opt2fn_null("-ei", filenames.size(), filenames.data()) != nullptr
1453                                     || observablesHistory.edsamHistory);
1454         auto constr              = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics,
1455                                                    fplog, *mdAtoms->mdatoms(),
1456                                                    cr, ms, &nrnb, wcycle, fr->bMolPBC);
1457
1458         /* Energy terms and groups */
1459         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(), inputrec->fepvals->n_lambda);
1460
1461         /* Kinetic energy data */
1462         gmx_ekindata_t ekind;
1463         init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel);
1464
1465         /* Set up interactive MD (IMD) */
1466         auto imdSession = makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
1467                                          MASTER(cr) ? globalState->x.rvec_array() : nullptr,
1468                                          filenames.size(), filenames.data(), oenv, mdrunOptions.imdOptions,
1469                                          startingBehavior);
1470
1471         if (DOMAINDECOMP(cr))
1472         {
1473             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1474             /* This call is not included in init_domain_decomposition mainly
1475              * because fr->cginfo_mb is set later.
1476              */
1477             dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
1478                             domdecOptions.checkBondedInteractions,
1479                             fr->cginfo_mb);
1480         }
1481
1482         if (updateTarget == TaskTarget::Gpu)
1483         {
1484             if (SIMMASTER(cr))
1485             {
1486                 gmx_fatal(FARGS, "It is currently not possible to redirect the calculation "
1487                           "of update and constraints to the GPU!");
1488             }
1489         }
1490
1491         // Before we start the actual simulator, try if we can run the update task on the GPU.
1492         useGpuForUpdate = decideWhetherToUseGpuForUpdate(DOMAINDECOMP(cr),
1493                                                          useGpuForPme,
1494                                                          useGpuForNonbonded,
1495                                                          c_enableGpuBufOps,
1496                                                          updateTarget,
1497                                                          gpusWereDetected,
1498                                                          *inputrec,
1499                                                          *mdAtoms,
1500                                                          doEssentialDynamics,
1501                                                          fcd->orires.nr != 0,
1502                                                          fcd->disres.nsystems != 0);
1503
1504         // TODO This is not the right place to manage the lifetime of
1505         // this data structure, but currently it's the easiest way to
1506         // make it work.
1507         MdrunScheduleWorkload runScheduleWork;
1508
1509         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
1510         SimulatorBuilder simulatorBuilder;
1511
1512         // build and run simulator object based on user-input
1513         const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
1514                     false,
1515                     inputrec, doRerun, vsite.get(), ms, replExParams,
1516                     fcd, static_cast<int>(filenames.size()), filenames.data(),
1517                     &observablesHistory, membed);
1518         auto simulator = simulatorBuilder.build(
1519                     inputIsCompatibleWithModularSimulator,
1520                     fplog, cr, ms, mdlog, static_cast<int>(filenames.size()), filenames.data(),
1521                     oenv,
1522                     mdrunOptions,
1523                     startingBehavior,
1524                     vsite.get(), constr.get(),
1525                     enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
1526                     deform.get(),
1527                     mdModules_->outputProvider(),
1528                     mdModules_->notifier(),
1529                     inputrec, imdSession.get(), pull_work, swap, &mtop,
1530                     fcd,
1531                     globalState.get(),
1532                     &observablesHistory,
1533                     mdAtoms.get(), &nrnb, wcycle, fr,
1534                     &enerd,
1535                     &ekind,
1536                     &runScheduleWork,
1537                     replExParams,
1538                     membed,
1539                     walltime_accounting,
1540                     std::move(stopHandlerBuilder_),
1541                     doRerun,
1542                     useGpuForUpdate);
1543         simulator->run();
1544
1545         if (inputrec->bPull)
1546         {
1547             finish_pull(pull_work);
1548         }
1549         finish_swapcoords(swap);
1550     }
1551     else
1552     {
1553         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
1554         /* do PME only */
1555         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
1556         gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
1557     }
1558
1559     wallcycle_stop(wcycle, ewcRUN);
1560
1561     /* Finish up, write some stuff
1562      * if rerunMD, don't write last frame again
1563      */
1564     finish_run(fplog, mdlog, cr,
1565                inputrec, &nrnb, wcycle, walltime_accounting,
1566                fr ? fr->nbv.get() : nullptr,
1567                pmedata,
1568                EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
1569
1570     // clean up cycle counter
1571     wallcycle_destroy(wcycle);
1572
1573 // Free PME data
1574     if (pmedata)
1575     {
1576         gmx_pme_destroy(pmedata);
1577         pmedata = nullptr;
1578     }
1579
1580     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
1581     // before we destroy the GPU context(s) in free_gpu_resources().
1582     // Pinned buffers are associated with contexts in CUDA.
1583     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
1584     mdAtoms.reset(nullptr);
1585     globalState.reset(nullptr);
1586     mdModules_.reset(nullptr);   // destruct force providers here as they might also use the GPU
1587
1588     /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
1589     free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
1590     free_gpu(nonbondedDeviceInfo);
1591     free_gpu(pmeDeviceInfo);
1592     done_forcerec(fr, mtop.molblock.size());
1593     sfree(fcd);
1594
1595     if (doMembed)
1596     {
1597         free_membed(membed);
1598     }
1599
1600     /* Does what it says */
1601     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
1602     walltime_accounting_destroy(walltime_accounting);
1603
1604     // Ensure log file content is written
1605     if (logFileHandle)
1606     {
1607         gmx_fio_flush(logFileHandle);
1608     }
1609
1610     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
1611      * exceptions were enabled before function was called. */
1612     if (bEnableFPE)
1613     {
1614         gmx_fedisableexcept();
1615     }
1616
1617     auto rc = static_cast<int>(gmx_get_stop_condition());
1618
1619 #if GMX_THREAD_MPI
1620     /* we need to join all threads. The sub-threads join when they
1621        exit this function, but the master thread needs to be told to
1622        wait for that. */
1623     if (PAR(cr) && MASTER(cr))
1624     {
1625         tMPI_Finalize();
1626     }
1627 #endif
1628     return rc;
1629 }
1630
1631 Mdrunner::~Mdrunner()
1632 {
1633     // Clean up of the Manager.
1634     // This will end up getting called on every thread-MPI rank, which is unnecessary,
1635     // but okay as long as threads synchronize some time before adding or accessing
1636     // a new set of restraints.
1637     if (restraintManager_)
1638     {
1639         restraintManager_->clear();
1640         GMX_ASSERT(restraintManager_->countRestraints() == 0,
1641                    "restraints added during runner life time should be cleared at runner destruction.");
1642     }
1643 };
1644
1645 void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller,
1646                             const std::string                        &name)
1647 {
1648     GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
1649     // Not sure if this should be logged through the md logger or something else,
1650     // but it is helpful to have some sort of INFO level message sent somewhere.
1651     //    std::cout << "Registering restraint named " << name << std::endl;
1652
1653     // When multiple restraints are used, it may be wasteful to register them separately.
1654     // Maybe instead register an entire Restraint Manager as a force provider.
1655     restraintManager_->addToSpec(std::move(puller),
1656                                  name);
1657 }
1658
1659 Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules)
1660     : mdModules_(std::move(mdModules))
1661 {
1662 }
1663
1664 Mdrunner::Mdrunner(Mdrunner &&) noexcept = default;
1665
1666 //NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
1667 Mdrunner &Mdrunner::operator=(Mdrunner && /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
1668
1669 class Mdrunner::BuilderImplementation
1670 {
1671     public:
1672         BuilderImplementation() = delete;
1673         BuilderImplementation(std::unique_ptr<MDModules>           mdModules,
1674                               compat::not_null<SimulationContext*> context);
1675         ~BuilderImplementation();
1676
1677         BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options,
1678                                                     real                forceWarningThreshold,
1679                                                     StartingBehavior    startingBehavior);
1680
1681         void addDomdec(const DomdecOptions &options);
1682
1683         void addVerletList(int nstlist);
1684
1685         void addReplicaExchange(const ReplicaExchangeParameters &params);
1686
1687         void addNonBonded(const char* nbpu_opt);
1688
1689         void addPME(const char* pme_opt_, const char* pme_fft_opt_);
1690
1691         void addBondedTaskAssignment(const char* bonded_opt);
1692
1693         void addUpdateTaskAssignment(const char* update_opt);
1694
1695         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
1696
1697         void addFilenames(ArrayRef <const t_filenm> filenames);
1698
1699         void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
1700
1701         void addLogFile(t_fileio *logFileHandle);
1702
1703         void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
1704
1705         Mdrunner build();
1706
1707     private:
1708
1709         // Default parameters copied from runner.h
1710         // \todo Clarify source(s) of default parameters.
1711
1712         const char* nbpu_opt_          = nullptr;
1713         const char* pme_opt_           = nullptr;
1714         const char* pme_fft_opt_       = nullptr;
1715         const char *bonded_opt_        = nullptr;
1716         const char *update_opt_        = nullptr;
1717
1718         MdrunOptions                          mdrunOptions_;
1719
1720         DomdecOptions                         domdecOptions_;
1721
1722         ReplicaExchangeParameters             replicaExchangeParameters_;
1723
1724         //! Command-line override for the duration of a neighbor list with the Verlet scheme.
1725         int         nstlist_ = 0;
1726
1727         //! Multisim communicator handle.
1728         gmx_multisim_t *multiSimulation_;
1729
1730         //! mdrun communicator
1731         MPI_Comm communicator_ = MPI_COMM_NULL;
1732
1733         //! Print a warning if any force is larger than this (in kJ/mol nm).
1734         real forceWarningThreshold_ = -1;
1735
1736         //! Whether the simulation will start afresh, or restart with/without appending.
1737         StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
1738
1739         //! The modules that comprise the functionality of mdrun.
1740         std::unique_ptr<MDModules> mdModules_;
1741
1742         //! \brief Parallelism information.
1743         gmx_hw_opt_t hardwareOptions_;
1744
1745         //! filename options for simulation.
1746         ArrayRef<const t_filenm> filenames_;
1747
1748         /*! \brief Handle to output environment.
1749          *
1750          * \todo gmx_output_env_t needs lifetime management.
1751          */
1752         gmx_output_env_t*    outputEnvironment_ = nullptr;
1753
1754         /*! \brief Non-owning handle to MD log file.
1755          *
1756          * \todo Context should own output facilities for client.
1757          * \todo Improve log file handle management.
1758          * \internal
1759          * Code managing the FILE* relies on the ability to set it to
1760          * nullptr to check whether the filehandle is valid.
1761          */
1762         t_fileio* logFileHandle_ = nullptr;
1763
1764         /*!
1765          * \brief Builder for simulation stop signal handler.
1766          */
1767         std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
1768 };
1769
1770 Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules>           mdModules,
1771                                                        compat::not_null<SimulationContext*> context) :
1772     mdModules_(std::move(mdModules))
1773 {
1774     communicator_    = context->communicator_;
1775     multiSimulation_ = context->multiSimulation_.get();
1776 }
1777
1778 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
1779
1780 Mdrunner::BuilderImplementation &
1781 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions    &options,
1782                                                       const real             forceWarningThreshold,
1783                                                       const StartingBehavior startingBehavior)
1784 {
1785     mdrunOptions_          = options;
1786     forceWarningThreshold_ = forceWarningThreshold;
1787     startingBehavior_      = startingBehavior;
1788     return *this;
1789 }
1790
1791 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions &options)
1792 {
1793     domdecOptions_ = options;
1794 }
1795
1796 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
1797 {
1798     nstlist_ = nstlist;
1799 }
1800
1801 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters &params)
1802 {
1803     replicaExchangeParameters_ = params;
1804 }
1805
1806 Mdrunner Mdrunner::BuilderImplementation::build()
1807 {
1808     auto newRunner = Mdrunner(std::move(mdModules_));
1809
1810     newRunner.mdrunOptions          = mdrunOptions_;
1811     newRunner.pforce                = forceWarningThreshold_;
1812     newRunner.startingBehavior      = startingBehavior_;
1813     newRunner.domdecOptions         = domdecOptions_;
1814
1815     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
1816     newRunner.hw_opt          = hardwareOptions_;
1817
1818     // No invariant to check. This parameter exists to optionally override other behavior.
1819     newRunner.nstlist_cmdline = nstlist_;
1820
1821     newRunner.replExParams    = replicaExchangeParameters_;
1822
1823     newRunner.filenames = filenames_;
1824
1825     newRunner.communicator = communicator_;
1826
1827     // nullptr is a valid value for the multisim handle
1828     newRunner.ms = multiSimulation_;
1829
1830     // \todo Clarify ownership and lifetime management for gmx_output_env_t
1831     // \todo Update sanity checking when output environment has clearly specified invariants.
1832     // Initialization and default values for oenv are not well specified in the current version.
1833     if (outputEnvironment_)
1834     {
1835         newRunner.oenv  = outputEnvironment_;
1836     }
1837     else
1838     {
1839         GMX_THROW(gmx::APIError("MdrunnerBuilder::addOutputEnvironment() is required before build()"));
1840     }
1841
1842     newRunner.logFileHandle = logFileHandle_;
1843
1844     if (nbpu_opt_)
1845     {
1846         newRunner.nbpu_opt    = nbpu_opt_;
1847     }
1848     else
1849     {
1850         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
1851     }
1852
1853     if (pme_opt_ && pme_fft_opt_)
1854     {
1855         newRunner.pme_opt     = pme_opt_;
1856         newRunner.pme_fft_opt = pme_fft_opt_;
1857     }
1858     else
1859     {
1860         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
1861     }
1862
1863     if (bonded_opt_)
1864     {
1865         newRunner.bonded_opt = bonded_opt_;
1866     }
1867     else
1868     {
1869         GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
1870     }
1871
1872     if (update_opt_)
1873     {
1874         newRunner.update_opt = update_opt_;
1875     }
1876     else
1877     {
1878         GMX_THROW(gmx::APIError("MdrunnerBuilder::addUpdateTaskAssignment() is required before build()  "));
1879     }
1880
1881
1882     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
1883
1884     if (stopHandlerBuilder_)
1885     {
1886         newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
1887     }
1888     else
1889     {
1890         newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
1891     }
1892
1893     return newRunner;
1894 }
1895
1896 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
1897 {
1898     nbpu_opt_ = nbpu_opt;
1899 }
1900
1901 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
1902                                              const char* pme_fft_opt)
1903 {
1904     pme_opt_     = pme_opt;
1905     pme_fft_opt_ = pme_fft_opt;
1906 }
1907
1908 void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
1909 {
1910     bonded_opt_ = bonded_opt;
1911 }
1912
1913 void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt)
1914 {
1915     update_opt_ = update_opt;
1916 }
1917
1918 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
1919 {
1920     hardwareOptions_ = hardwareOptions;
1921 }
1922
1923 void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
1924 {
1925     filenames_ = filenames;
1926 }
1927
1928 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
1929 {
1930     outputEnvironment_ = outputEnvironment;
1931 }
1932
1933 void Mdrunner::BuilderImplementation::addLogFile(t_fileio *logFileHandle)
1934 {
1935     logFileHandle_ = logFileHandle;
1936 }
1937
1938 void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
1939 {
1940     stopHandlerBuilder_ = std::move(builder);
1941 }
1942
1943 MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
1944                                  compat::not_null<SimulationContext*> context) :
1945     impl_ {std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules), context)}
1946 {
1947 }
1948
1949 MdrunnerBuilder::~MdrunnerBuilder() = default;
1950
1951 MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions    &options,
1952                                                       real                   forceWarningThreshold,
1953                                                       const StartingBehavior startingBehavior)
1954 {
1955     impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
1956     return *this;
1957 }
1958
1959 MdrunnerBuilder &MdrunnerBuilder::addDomainDecomposition(const DomdecOptions &options)
1960 {
1961     impl_->addDomdec(options);
1962     return *this;
1963 }
1964
1965 MdrunnerBuilder &MdrunnerBuilder::addNeighborList(int nstlist)
1966 {
1967     impl_->addVerletList(nstlist);
1968     return *this;
1969 }
1970
1971 MdrunnerBuilder &MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters &params)
1972 {
1973     impl_->addReplicaExchange(params);
1974     return *this;
1975 }
1976
1977 MdrunnerBuilder &MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
1978 {
1979     impl_->addNonBonded(nbpu_opt);
1980     return *this;
1981 }
1982
1983 MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
1984                                                     const char* pme_fft_opt)
1985 {
1986     // The builder method may become more general in the future, but in this version,
1987     // parameters for PME electrostatics are both required and the only parameters
1988     // available.
1989     if (pme_opt && pme_fft_opt)
1990     {
1991         impl_->addPME(pme_opt, pme_fft_opt);
1992     }
1993     else
1994     {
1995         GMX_THROW(gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
1996     }
1997     return *this;
1998 }
1999
2000 MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
2001 {
2002     impl_->addBondedTaskAssignment(bonded_opt);
2003     return *this;
2004 }
2005
2006 MdrunnerBuilder &MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt)
2007 {
2008     impl_->addUpdateTaskAssignment(update_opt);
2009     return *this;
2010 }
2011
2012 Mdrunner MdrunnerBuilder::build()
2013 {
2014     return impl_->build();
2015 }
2016
2017 MdrunnerBuilder &MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
2018 {
2019     impl_->addHardwareOptions(hardwareOptions);
2020     return *this;
2021 }
2022
2023 MdrunnerBuilder &MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
2024 {
2025     impl_->addFilenames(filenames);
2026     return *this;
2027 }
2028
2029 MdrunnerBuilder &MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2030 {
2031     impl_->addOutputEnvironment(outputEnvironment);
2032     return *this;
2033 }
2034
2035 MdrunnerBuilder &MdrunnerBuilder::addLogFile(t_fileio *logFileHandle)
2036 {
2037     impl_->addLogFile(logFileHandle);
2038     return *this;
2039 }
2040
2041 MdrunnerBuilder &MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2042 {
2043     impl_->addStopHandlerBuilder(std::move(builder));
2044     return *this;
2045 }
2046
2047 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder &&) noexcept = default;
2048
2049 MdrunnerBuilder &MdrunnerBuilder::operator=(MdrunnerBuilder &&) noexcept = default;
2050
2051 } // namespace gmx