src/gromacs/mdrun/runner.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the MD runner routine calling all integrators.
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include "runner.h"
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cinttypes>
  52 #include <csignal>
  53 #include <cstdlib>
  54 #include <cstring>
  55
  56 #include <algorithm>
  57 #include <memory>
  58
  59 #include "gromacs/commandline/filenm.h"
  60 #include "gromacs/domdec/domdec.h"
  61 #include "gromacs/domdec/domdec_struct.h"
  62 #include "gromacs/domdec/localatomsetmanager.h"
  63 #include "gromacs/domdec/partition.h"
  64 #include "gromacs/ewald/ewald_utils.h"
  65 #include "gromacs/ewald/pme.h"
  66 #include "gromacs/ewald/pme_gpu_program.h"
  67 #include "gromacs/fileio/checkpoint.h"
  68 #include "gromacs/fileio/gmxfio.h"
  69 #include "gromacs/fileio/oenv.h"
  70 #include "gromacs/fileio/tpxio.h"
  71 #include "gromacs/gmxlib/network.h"
  72 #include "gromacs/gmxlib/nrnb.h"
  73 #include "gromacs/gpu_utils/clfftinitializer.h"
  74 #include "gromacs/gpu_utils/gpu_utils.h"
  75 #include "gromacs/hardware/cpuinfo.h"
  76 #include "gromacs/hardware/detecthardware.h"
  77 #include "gromacs/hardware/printhardware.h"
  78 #include "gromacs/imd/imd.h"
  79 #include "gromacs/listed_forces/disre.h"
  80 #include "gromacs/listed_forces/gpubonded.h"
  81 #include "gromacs/listed_forces/orires.h"
  82 #include "gromacs/math/functions.h"
  83 #include "gromacs/math/utilities.h"
  84 #include "gromacs/math/vec.h"
  85 #include "gromacs/mdlib/boxdeformation.h"
  86 #include "gromacs/mdlib/broadcaststructs.h"
  87 #include "gromacs/mdlib/calc_verletbuf.h"
  88 #include "gromacs/mdlib/dispersioncorrection.h"
  89 #include "gromacs/mdlib/enerdata_utils.h"
  90 #include "gromacs/mdlib/force.h"
  91 #include "gromacs/mdlib/forcerec.h"
  92 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  93 #include "gromacs/mdlib/makeconstraints.h"
  94 #include "gromacs/mdlib/md_support.h"
  95 #include "gromacs/mdlib/mdatoms.h"
  96 #include "gromacs/mdlib/membed.h"
  97 #include "gromacs/mdlib/ppforceworkload.h"
  98 #include "gromacs/mdlib/qmmm.h"
  99 #include "gromacs/mdlib/sighandler.h"
 100 #include "gromacs/mdlib/stophandler.h"
 101 #include "gromacs/mdrun/mdmodules.h"
 102 #include "gromacs/mdrun/simulationcontext.h"
 103 #include "gromacs/mdrunutility/handlerestart.h"
 104 #include "gromacs/mdrunutility/logging.h"
 105 #include "gromacs/mdrunutility/multisim.h"
 106 #include "gromacs/mdrunutility/printtime.h"
 107 #include "gromacs/mdrunutility/threadaffinity.h"
 108 #include "gromacs/mdtypes/commrec.h"
 109 #include "gromacs/mdtypes/enerdata.h"
 110 #include "gromacs/mdtypes/fcdata.h"
 111 #include "gromacs/mdtypes/inputrec.h"
 112 #include "gromacs/mdtypes/md_enums.h"
 113 #include "gromacs/mdtypes/mdrunoptions.h"
 114 #include "gromacs/mdtypes/observableshistory.h"
 115 #include "gromacs/mdtypes/state.h"
 116 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 117 #include "gromacs/nbnxm/nbnxm.h"
 118 #include "gromacs/nbnxm/pairlist_tuning.h"
 119 #include "gromacs/pbcutil/pbc.h"
 120 #include "gromacs/pulling/output.h"
 121 #include "gromacs/pulling/pull.h"
 122 #include "gromacs/pulling/pull_rotation.h"
 123 #include "gromacs/restraint/manager.h"
 124 #include "gromacs/restraint/restraintmdmodule.h"
 125 #include "gromacs/restraint/restraintpotential.h"
 126 #include "gromacs/swap/swapcoords.h"
 127 #include "gromacs/taskassignment/decidegpuusage.h"
 128 #include "gromacs/taskassignment/resourcedivision.h"
 129 #include "gromacs/taskassignment/taskassignment.h"
 130 #include "gromacs/taskassignment/usergpuids.h"
 131 #include "gromacs/timing/gpu_timing.h"
 132 #include "gromacs/timing/wallcycle.h"
 133 #include "gromacs/timing/wallcyclereporting.h"
 134 #include "gromacs/topology/mtop_util.h"
 135 #include "gromacs/trajectory/trajectoryframe.h"
 136 #include "gromacs/utility/basenetwork.h"
 137 #include "gromacs/utility/cstringutil.h"
 138 #include "gromacs/utility/exceptions.h"
 139 #include "gromacs/utility/fatalerror.h"
 140 #include "gromacs/utility/filestream.h"
 141 #include "gromacs/utility/gmxassert.h"
 142 #include "gromacs/utility/gmxmpi.h"
 143 #include "gromacs/utility/logger.h"
 144 #include "gromacs/utility/loggerbuilder.h"
 145 #include "gromacs/utility/physicalnodecommunicator.h"
 146 #include "gromacs/utility/pleasecite.h"
 147 #include "gromacs/utility/programcontext.h"
 148 #include "gromacs/utility/smalloc.h"
 149 #include "gromacs/utility/stringutil.h"
 150
 151 #include "legacysimulator.h"
 152 #include "replicaexchange.h"
 153
 154 #if GMX_FAHCORE
 155 #include "corewrap.h"
 156 #endif
 157
 158 namespace gmx
 159 {
 160
 161 /*! \brief Log if development feature flags are encountered
 162  *
 163  * The use of dev features indicated by environment variables is logged
 164  * in order to ensure that runs with such featrues enabled can be identified
 165  * from their log and standard output.
 166  *
 167  * \param[in]  mdlog        Logger object.
 168  */
 169 static void reportDevelopmentFeatures(const gmx::MDLogger &mdlog)
 170 {
 171     const bool enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 172     const bool useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
 173
 174     if (enableGpuBufOps)
 175     {
 176         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 177                 "NOTE: This run uses the 'GPU buffer ops' feature, enabled by the GMX_USE_GPU_BUFFER_OPS environment variable.");
 178     }
 179
 180     if (useGpuUpdateConstrain)
 181     {
 182         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 183                 "NOTE: This run uses the 'GPU update/constraints' feature, enabled by the GMX_UPDATE_CONSTRAIN_GPU environment variable.");
 184     }
 185 }
 186
 187 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
 188  *
 189  * Used to ensure that the master thread does not modify mdrunner during copy
 190  * on the spawned threads. */
 191 static void threadMpiMdrunnerAccessBarrier()
 192 {
 193 #if GMX_THREAD_MPI
 194     MPI_Barrier(MPI_COMM_WORLD);
 195 #endif
 196 }
 197
 198 Mdrunner Mdrunner::cloneOnSpawnedThread() const
 199 {
 200     auto newRunner = Mdrunner(std::make_unique<MDModules>());
 201
 202     // All runners in the same process share a restraint manager resource because it is
 203     // part of the interface to the client code, which is associated only with the
 204     // original thread. Handles to the same resources can be obtained by copy.
 205     {
 206         newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
 207     }
 208
 209     // Copy original cr pointer before master thread can pass the thread barrier
 210     newRunner.cr  = reinitialize_commrec_for_this_thread(cr);
 211
 212     // Copy members of master runner.
 213     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
 214     // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
 215     newRunner.hw_opt    = hw_opt;
 216     newRunner.filenames = filenames;
 217
 218     newRunner.oenv                = oenv;
 219     newRunner.mdrunOptions        = mdrunOptions;
 220     newRunner.domdecOptions       = domdecOptions;
 221     newRunner.nbpu_opt            = nbpu_opt;
 222     newRunner.pme_opt             = pme_opt;
 223     newRunner.pme_fft_opt         = pme_fft_opt;
 224     newRunner.bonded_opt          = bonded_opt;
 225     newRunner.nstlist_cmdline     = nstlist_cmdline;
 226     newRunner.replExParams        = replExParams;
 227     newRunner.pforce              = pforce;
 228     newRunner.ms                  = ms;
 229     newRunner.startingBehavior    = startingBehavior;
 230     newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
 231
 232     threadMpiMdrunnerAccessBarrier();
 233
 234     GMX_RELEASE_ASSERT(!MASTER(newRunner.cr), "cloneOnSpawnedThread should only be called on spawned threads");
 235
 236     return newRunner;
 237 }
 238
 239 /*! \brief The callback used for running on spawned threads.
 240  *
 241  * Obtains the pointer to the master mdrunner object from the one
 242  * argument permitted to the thread-launch API call, copies it to make
 243  * a new runner for this thread, reinitializes necessary data, and
 244  * proceeds to the simulation. */
 245 static void mdrunner_start_fn(const void *arg)
 246 {
 247     try
 248     {
 249         auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg);
 250         /* copy the arg list to make sure that it's thread-local. This
 251            doesn't copy pointed-to items, of course; fnm, cr and fplog
 252            are reset in the call below, all others should be const. */
 253         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
 254         mdrunner.mdrunner();
 255     }
 256     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 257 }
 258
 259
 260 /*! \brief Start thread-MPI threads.
 261  *
 262  * Called by mdrunner() to start a specific number of threads
 263  * (including the main thread) for thread-parallel runs. This in turn
 264  * calls mdrunner() for each thread. All options are the same as for
 265  * mdrunner(). */
 266 t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const
 267 {
 268
 269     /* first check whether we even need to start tMPI */
 270     if (numThreadsToLaunch < 2)
 271     {
 272         return cr;
 273     }
 274
 275 #if GMX_THREAD_MPI
 276     /* now spawn new threads that start mdrunner_start_fn(), while
 277        the main thread returns. Thread affinity is handled later. */
 278     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
 279                      mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
 280     {
 281         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
 282     }
 283
 284     threadMpiMdrunnerAccessBarrier();
 285 #else
 286     GMX_UNUSED_VALUE(mdrunner_start_fn);
 287 #endif
 288
 289     return reinitialize_commrec_for_this_thread(cr);
 290 }
 291
 292 }  // namespace gmx
 293
 294 /*! \brief Initialize variables for Verlet scheme simulation */
 295 static void prepare_verlet_scheme(FILE                           *fplog,
 296                                   t_commrec                      *cr,
 297                                   t_inputrec                     *ir,
 298                                   int                             nstlist_cmdline,
 299                                   const gmx_mtop_t               *mtop,
 300                                   const matrix                    box,
 301                                   bool                            makeGpuPairList,
 302                                   const gmx::CpuInfo             &cpuinfo)
 303 {
 304     /* For NVE simulations, we will retain the initial list buffer */
 305     if (EI_DYNAMICS(ir->eI) &&
 306         ir->verletbuf_tol > 0 &&
 307         !(EI_MD(ir->eI) && ir->etc == etcNO))
 308     {
 309         /* Update the Verlet buffer size for the current run setup */
 310
 311         /* Here we assume SIMD-enabled kernels are being used. But as currently
 312          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 313          * and 4x2 gives a larger buffer than 4x4, this is ok.
 314          */
 315         ListSetupType      listType  = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
 316         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
 317
 318         const real         rlist_new =
 319             calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
 320
 321         if (rlist_new != ir->rlist)
 322         {
 323             if (fplog != nullptr)
 324             {
 325                 fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 326                         ir->rlist, rlist_new,
 327                         listSetup.cluster_size_i, listSetup.cluster_size_j);
 328             }
 329             ir->rlist     = rlist_new;
 330         }
 331     }
 332
 333     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
 334     {
 335         gmx_fatal(FARGS, "Can not set nstlist without %s",
 336                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
 337     }
 338
 339     if (EI_DYNAMICS(ir->eI))
 340     {
 341         /* Set or try nstlist values */
 342         increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
 343     }
 344 }
 345
 346 /*! \brief Override the nslist value in inputrec
 347  *
 348  * with value passed on the command line (if any)
 349  */
 350 static void override_nsteps_cmdline(const gmx::MDLogger &mdlog,
 351                                     int64_t              nsteps_cmdline,
 352                                     t_inputrec          *ir)
 353 {
 354     assert(ir);
 355
 356     /* override with anything else than the default -2 */
 357     if (nsteps_cmdline > -2)
 358     {
 359         char sbuf_steps[STEPSTRSIZE];
 360         char sbuf_msg[STRLEN];
 361
 362         ir->nsteps = nsteps_cmdline;
 363         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
 364         {
 365             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
 366                     gmx_step_str(nsteps_cmdline, sbuf_steps),
 367                     fabs(nsteps_cmdline*ir->delta_t));
 368         }
 369         else
 370         {
 371             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
 372                     gmx_step_str(nsteps_cmdline, sbuf_steps));
 373         }
 374
 375         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
 376     }
 377     else if (nsteps_cmdline < -2)
 378     {
 379         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64,
 380                   nsteps_cmdline);
 381     }
 382     /* Do nothing if nsteps_cmdline == -2 */
 383 }
 384
 385 namespace gmx
 386 {
 387
 388 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
 389  *
 390  * If not, and if a warning may be issued, logs a warning about
 391  * falling back to CPU code. With thread-MPI, only the first
 392  * call to this function should have \c issueWarning true. */
 393 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
 394                                                const t_inputrec *ir,
 395                                                bool              issueWarning)
 396 {
 397     if (ir->opts.ngener - ir->nwall > 1)
 398     {
 399         /* The GPU code does not support more than one energy group.
 400          * If the user requested GPUs explicitly, a fatal error is given later.
 401          */
 402         if (issueWarning)
 403         {
 404             GMX_LOG(mdlog.warning).asParagraph()
 405                 .appendText("Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
 406                             "For better performance, run on the GPU without energy groups and then do "
 407                             "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.");
 408         }
 409         return false;
 410     }
 411     return true;
 412 }
 413
 414 //! Initializes the logger for mdrun.
 415 static gmx::LoggerOwner buildLogger(FILE *fplog, const t_commrec *cr)
 416 {
 417     gmx::LoggerBuilder builder;
 418     if (fplog != nullptr)
 419     {
 420         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
 421     }
 422     if (cr == nullptr || SIMMASTER(cr))
 423     {
 424         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning,
 425                                 &gmx::TextOutputFile::standardError());
 426     }
 427     return builder.build();
 428 }
 429
 430 //! Make a TaskTarget from an mdrun argument string.
 431 static TaskTarget findTaskTarget(const char *optionString)
 432 {
 433     TaskTarget returnValue = TaskTarget::Auto;
 434
 435     if (strncmp(optionString, "auto", 3) == 0)
 436     {
 437         returnValue = TaskTarget::Auto;
 438     }
 439     else if (strncmp(optionString, "cpu", 3) == 0)
 440     {
 441         returnValue = TaskTarget::Cpu;
 442     }
 443     else if (strncmp(optionString, "gpu", 3) == 0)
 444     {
 445         returnValue = TaskTarget::Gpu;
 446     }
 447     else
 448     {
 449         GMX_ASSERT(false, "Option string should have been checked for sanity already");
 450     }
 451
 452     return returnValue;
 453 }
 454
 455 //! Finish run, aggregate data to print performance info.
 456 static void finish_run(FILE *fplog,
 457                        const gmx::MDLogger &mdlog,
 458                        const t_commrec *cr,
 459                        const t_inputrec *inputrec,
 460                        t_nrnb nrnb[], gmx_wallcycle_t wcycle,
 461                        gmx_walltime_accounting_t walltime_accounting,
 462                        nonbonded_verlet_t *nbv,
 463                        const gmx_pme_t *pme,
 464                        gmx_bool bWriteStat)
 465 {
 466     double  delta_t  = 0;
 467     double  nbfs     = 0, mflop = 0;
 468     double  elapsed_time,
 469             elapsed_time_over_all_ranks,
 470             elapsed_time_over_all_threads,
 471             elapsed_time_over_all_threads_over_all_ranks;
 472     /* Control whether it is valid to print a report. Only the
 473        simulation master may print, but it should not do so if the run
 474        terminated e.g. before a scheduled reset step. This is
 475        complicated by the fact that PME ranks are unaware of the
 476        reason why they were sent a pmerecvqxFINISH. To avoid
 477        communication deadlocks, we always do the communication for the
 478        report, even if we've decided not to write the report, because
 479        how long it takes to finish the run is not important when we've
 480        decided not to report on the simulation performance.
 481
 482        Further, we only report performance for dynamical integrators,
 483        because those are the only ones for which we plan to
 484        consider doing any optimizations. */
 485     bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
 486
 487     if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
 488     {
 489         GMX_LOG(mdlog.warning).asParagraph().appendText("Simulation ended prematurely, no performance report will be written.");
 490         printReport = false;
 491     }
 492
 493     t_nrnb                  *nrnb_tot;
 494     std::unique_ptr<t_nrnb>  nrnbTotalStorage;
 495     if (cr->nnodes > 1)
 496     {
 497         nrnbTotalStorage = std::make_unique<t_nrnb>();
 498         nrnb_tot         = nrnbTotalStorage.get();
 499 #if GMX_MPI
 500         MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
 501                       cr->mpi_comm_mysim);
 502 #endif
 503     }
 504     else
 505     {
 506         nrnb_tot = nrnb;
 507     }
 508
 509     elapsed_time                  = walltime_accounting_get_time_since_reset(walltime_accounting);
 510     elapsed_time_over_all_threads = walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
 511     if (cr->nnodes > 1)
 512     {
 513 #if GMX_MPI
 514         /* reduce elapsed_time over all MPI ranks in the current simulation */
 515         MPI_Allreduce(&elapsed_time,
 516                       &elapsed_time_over_all_ranks,
 517                       1, MPI_DOUBLE, MPI_SUM,
 518                       cr->mpi_comm_mysim);
 519         elapsed_time_over_all_ranks /= cr->nnodes;
 520         /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
 521          * current simulation. */
 522         MPI_Allreduce(&elapsed_time_over_all_threads,
 523                       &elapsed_time_over_all_threads_over_all_ranks,
 524                       1, MPI_DOUBLE, MPI_SUM,
 525                       cr->mpi_comm_mysim);
 526 #endif
 527     }
 528     else
 529     {
 530         elapsed_time_over_all_ranks                  = elapsed_time;
 531         elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
 532     }
 533
 534     if (printReport)
 535     {
 536         print_flop(fplog, nrnb_tot, &nbfs, &mflop);
 537     }
 538
 539     if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
 540     {
 541         print_dd_statistics(cr, inputrec, fplog);
 542     }
 543
 544     /* TODO Move the responsibility for any scaling by thread counts
 545      * to the code that handled the thread region, so that there's a
 546      * mechanism to keep cycle counting working during the transition
 547      * to task parallelism. */
 548     int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 549     int nthreads_pme = gmx_omp_nthreads_get(emntPME);
 550     wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), nthreads_pp, nthreads_pme);
 551     auto cycle_sum(wallcycle_sum(cr, wcycle));
 552
 553     if (printReport)
 554     {
 555         auto                    nbnxn_gpu_timings = (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
 556         gmx_wallclock_gpu_pme_t pme_gpu_timings   = {};
 557
 558         if (pme_gpu_task_enabled(pme))
 559         {
 560             pme_gpu_get_timings(pme, &pme_gpu_timings);
 561         }
 562         wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
 563                         elapsed_time_over_all_ranks,
 564                         wcycle, cycle_sum,
 565                         nbnxn_gpu_timings,
 566                         &pme_gpu_timings);
 567
 568         if (EI_DYNAMICS(inputrec->eI))
 569         {
 570             delta_t = inputrec->delta_t;
 571         }
 572
 573         if (fplog)
 574         {
 575             print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
 576                        elapsed_time_over_all_ranks,
 577                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 578                        delta_t, nbfs, mflop);
 579         }
 580         if (bWriteStat)
 581         {
 582             print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
 583                        elapsed_time_over_all_ranks,
 584                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 585                        delta_t, nbfs, mflop);
 586         }
 587     }
 588 }
 589
 590 int Mdrunner::mdrunner()
 591 {
 592     matrix                    box;
 593     t_forcerec               *fr               = nullptr;
 594     t_fcdata                 *fcd              = nullptr;
 595     real                      ewaldcoeff_q     = 0;
 596     real                      ewaldcoeff_lj    = 0;
 597     int                       nChargePerturbed = -1, nTypePerturbed = 0;
 598     gmx_wallcycle_t           wcycle;
 599     gmx_walltime_accounting_t walltime_accounting = nullptr;
 600     gmx_membed_t *            membed              = nullptr;
 601     gmx_hw_info_t            *hwinfo              = nullptr;
 602
 603     /* CAUTION: threads may be started later on in this function, so
 604        cr doesn't reflect the final parallel state right now */
 605     t_inputrec                      inputrecInstance;
 606     t_inputrec                     *inputrec = &inputrecInstance;
 607     gmx_mtop_t                      mtop;
 608
 609     bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data());
 610     bool doRerun  = mdrunOptions.rerun;
 611
 612     // Handle task-assignment related user options.
 613     EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
 614                                                EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
 615
 616     std::vector<int> userGpuTaskAssignment;
 617     try
 618     {
 619         userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
 620     }
 621     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 622     auto       nonbondedTarget = findTaskTarget(nbpu_opt);
 623     auto       pmeTarget       = findTaskTarget(pme_opt);
 624     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
 625     auto       bondedTarget    = findTaskTarget(bonded_opt);
 626     PmeRunMode pmeRunMode      = PmeRunMode::None;
 627
 628     // Here we assume that SIMMASTER(cr) does not change even after the
 629     // threads are started.
 630
 631     FILE *fplog = nullptr;
 632     // If we are appending, we don't write log output because we need
 633     // to check that the old log file matches what the checkpoint file
 634     // expects. Otherwise, we should start to write log output now if
 635     // there is a file ready for it.
 636     if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
 637     {
 638         fplog = gmx_fio_getfp(logFileHandle);
 639     }
 640     gmx::LoggerOwner logOwner(buildLogger(fplog, cr));
 641     gmx::MDLogger    mdlog(logOwner.logger());
 642
 643     // report any development features that may be enabled by environment variables
 644     reportDevelopmentFeatures(mdlog);
 645
 646     // With thread-MPI, the communicator changes after threads are
 647     // launched, so this is rebuilt for the master rank at that
 648     // time. The non-master ranks are fine to keep the one made here.
 649     PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
 650     hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
 651
 652     gmx_print_detected_hardware(fplog, isMasterSimMasterRank(ms, MASTER(cr)), mdlog, hwinfo);
 653
 654     std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
 655
 656     // Print citation requests after all software/hardware printing
 657     pleaseCiteGromacs(fplog);
 658
 659     std::unique_ptr<t_state> globalState;
 660
 661     if (SIMMASTER(cr))
 662     {
 663         /* Only the master rank has the global state */
 664         globalState = std::make_unique<t_state>();
 665
 666         /* Read (nearly) all data required for the simulation */
 667         read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), inputrec, globalState.get(), &mtop);
 668     }
 669
 670     /* Check and update the hardware options for internal consistency */
 671     checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks);
 672
 673     if (GMX_THREAD_MPI && SIMMASTER(cr))
 674     {
 675         bool useGpuForNonbonded = false;
 676         bool useGpuForPme       = false;
 677         try
 678         {
 679             // If the user specified the number of ranks, then we must
 680             // respect that, but in default mode, we need to allow for
 681             // the number of GPUs to choose the number of ranks.
 682             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 683             useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
 684                     (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
 685                     canUseGpuForNonbonded,
 686                     gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI),
 687                     hw_opt.nthreads_tmpi);
 688             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
 689                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
 690                     *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
 691
 692         }
 693         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 694
 695         /* Determine how many thread-MPI ranks to start.
 696          *
 697          * TODO Over-writing the user-supplied value here does
 698          * prevent any possible subsequent checks from working
 699          * correctly. */
 700         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
 701                                                 &hw_opt,
 702                                                 gpuIdsToUse,
 703                                                 useGpuForNonbonded,
 704                                                 useGpuForPme,
 705                                                 inputrec, &mtop,
 706                                                 mdlog,
 707                                                 doMembed);
 708
 709         // Now start the threads for thread MPI.
 710         cr = spawnThreads(hw_opt.nthreads_tmpi);
 711         /* The main thread continues here with a new cr. We don't deallocate
 712            the old cr because other threads may still be reading it. */
 713         // TODO Both master and spawned threads call dup_tfn and
 714         // reinitialize_commrec_for_this_thread. Find a way to express
 715         // this better.
 716         physicalNodeComm = PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
 717     }
 718     // END OF CAUTION: cr and physicalNodeComm are now reliable
 719
 720     if (PAR(cr))
 721     {
 722         /* now broadcast everything to the non-master nodes/threads: */
 723         init_parallel(cr, inputrec, &mtop);
 724     }
 725
 726     // Now each rank knows the inputrec that SIMMASTER read and used,
 727     // and (if applicable) cr->nnodes has been assigned the number of
 728     // thread-MPI ranks that have been chosen. The ranks can now all
 729     // run the task-deciding functions and will agree on the result
 730     // without needing to communicate.
 731     //
 732     // TODO Should we do the communication in debug mode to support
 733     // having an assertion?
 734     //
 735     // Note that these variables describe only their own node.
 736     //
 737     // Note that when bonded interactions run on a GPU they always run
 738     // alongside a nonbonded task, so do not influence task assignment
 739     // even though they affect the force calculation workload.
 740     bool useGpuForNonbonded = false;
 741     bool useGpuForPme       = false;
 742     bool useGpuForBonded    = false;
 743     try
 744     {
 745         // It's possible that there are different numbers of GPUs on
 746         // different nodes, which is the user's responsibilty to
 747         // handle. If unsuitable, we will notice that during task
 748         // assignment.
 749         bool gpusWereDetected      = hwinfo->ngpu_compatible_tot > 0;
 750         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 751         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
 752                                                                 emulateGpuNonbonded,
 753                                                                 canUseGpuForNonbonded,
 754                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
 755                                                                 gpusWereDetected);
 756         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
 757                                                     *hwinfo, *inputrec, mtop,
 758                                                     cr->nnodes, domdecOptions.numPmeRanks,
 759                                                     gpusWereDetected);
 760         auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
 761         useGpuForBonded =
 762             decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
 763                                             bondedTarget, canUseGpuForBonded,
 764                                             EVDW_PME(inputrec->vdwtype),
 765                                             EEL_PME_EWALD(inputrec->coulombtype),
 766                                             domdecOptions.numPmeRanks, gpusWereDetected);
 767
 768         pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
 769         if (pmeRunMode == PmeRunMode::GPU)
 770         {
 771             if (pmeFftTarget == TaskTarget::Cpu)
 772             {
 773                 pmeRunMode = PmeRunMode::Mixed;
 774             }
 775         }
 776         else if (pmeFftTarget == TaskTarget::Gpu)
 777         {
 778             gmx_fatal(FARGS, "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME on CPU you should not be using -pmefft.");
 779         }
 780     }
 781     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 782
 783     // Build restraints.
 784     // TODO: hide restraint implementation details from Mdrunner.
 785     // There is nothing unique about restraints at this point as far as the
 786     // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
 787     // factory functions from the SimulationContext on which to call mdModules_->add().
 788     // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
 789     for (auto && restraint : restraintManager_->getRestraints())
 790     {
 791         auto module = RestraintMDModule::create(restraint,
 792                                                 restraint->sites());
 793         mdModules_->add(std::move(module));
 794     }
 795
 796     // TODO: Error handling
 797     mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
 798
 799     if (fplog != nullptr)
 800     {
 801         pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
 802         fprintf(fplog, "\n");
 803     }
 804
 805     if (SIMMASTER(cr))
 806     {
 807         /* In rerun, set velocities to zero if present */
 808         if (doRerun && ((globalState->flags & (1 << estV)) != 0))
 809         {
 810             // rerun does not use velocities
 811             GMX_LOG(mdlog.info).asParagraph().appendText(
 812                     "Rerun trajectory contains velocities. Rerun does only evaluate "
 813                     "potential energy and forces. The velocities will be ignored.");
 814             for (int i = 0; i < globalState->natoms; i++)
 815             {
 816                 clear_rvec(globalState->v[i]);
 817             }
 818             globalState->flags &= ~(1 << estV);
 819         }
 820
 821         /* now make sure the state is initialized and propagated */
 822         set_state_entries(globalState.get(), inputrec);
 823     }
 824
 825     /* NM and TPI parallelize over force/energy calculations, not atoms,
 826      * so we need to initialize and broadcast the global state.
 827      */
 828     if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
 829     {
 830         if (!MASTER(cr))
 831         {
 832             globalState = std::make_unique<t_state>();
 833         }
 834         broadcastStateWithoutDynamics(cr, globalState.get());
 835     }
 836
 837     /* A parallel command line option consistency check that we can
 838        only do after any threads have started. */
 839     if (!PAR(cr) && (domdecOptions.numCells[XX] > 1 ||
 840                      domdecOptions.numCells[YY] > 1 ||
 841                      domdecOptions.numCells[ZZ] > 1 ||
 842                      domdecOptions.numPmeRanks > 0))
 843     {
 844         gmx_fatal(FARGS,
 845                   "The -dd or -npme option request a parallel simulation, "
 846 #if !GMX_MPI
 847                   "but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv));
 848 #else
 849 #if GMX_THREAD_MPI
 850                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
 851 #else
 852                   "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv));
 853 #endif
 854 #endif
 855     }
 856
 857     if (doRerun &&
 858         (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 859     {
 860         gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 861     }
 862
 863     if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
 864     {
 865         if (domdecOptions.numPmeRanks > 0)
 866         {
 867             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
 868                                  "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
 869         }
 870
 871         domdecOptions.numPmeRanks = 0;
 872     }
 873
 874     if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
 875     {
 876         /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
 877          * improve performance with many threads per GPU, since our OpenMP
 878          * scaling is bad, but it's difficult to automate the setup.
 879          */
 880         domdecOptions.numPmeRanks = 0;
 881     }
 882     if (useGpuForPme)
 883     {
 884         if (domdecOptions.numPmeRanks < 0)
 885         {
 886             domdecOptions.numPmeRanks = 0;
 887             // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
 888         }
 889         else
 890         {
 891             GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported");
 892         }
 893     }
 894
 895 #if GMX_FAHCORE
 896     if (MASTER(cr))
 897     {
 898         fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
 899     }
 900 #endif
 901
 902     /* NMR restraints must be initialized before load_checkpoint,
 903      * since with time averaging the history is added to t_state.
 904      * For proper consistency check we therefore need to extend
 905      * t_state here.
 906      * So the PME-only nodes (if present) will also initialize
 907      * the distance restraints.
 908      */
 909     snew(fcd, 1);
 910
 911     /* This needs to be called before read_checkpoint to extend the state */
 912     init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
 913
 914     init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
 915
 916     auto                 deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
 917
 918     ObservablesHistory   observablesHistory = {};
 919
 920     if (startingBehavior != StartingBehavior::NewSimulation)
 921     {
 922         /* Check if checkpoint file exists before doing continuation.
 923          * This way we can use identical input options for the first and subsequent runs...
 924          */
 925         if (mdrunOptions.numStepsCommandline > -2)
 926         {
 927             /* Temporarily set the number of steps to unmlimited to avoid
 928              * triggering the nsteps check in load_checkpoint().
 929              * This hack will go away soon when the -nsteps option is removed.
 930              */
 931             inputrec->nsteps = -1;
 932         }
 933
 934         load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr),
 935                         logFileHandle,
 936                         cr, domdecOptions.numCells,
 937                         inputrec, globalState.get(),
 938                         &observablesHistory,
 939                         mdrunOptions.reproducible);
 940
 941         if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
 942         {
 943             // Now we can start normal logging to the truncated log file.
 944             fplog    = gmx_fio_getfp(logFileHandle);
 945             prepareLogAppending(fplog);
 946             logOwner = buildLogger(fplog, cr);
 947             mdlog    = logOwner.logger();
 948         }
 949     }
 950
 951     if (mdrunOptions.numStepsCommandline > -2)
 952     {
 953         GMX_LOG(mdlog.info).asParagraph().
 954             appendText("The -nsteps functionality is deprecated, and may be removed in a future version. "
 955                        "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp file field.");
 956     }
 957     /* override nsteps with value set on the commamdline */
 958     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
 959
 960     if (SIMMASTER(cr))
 961     {
 962         copy_mat(globalState->box, box);
 963     }
 964
 965     if (PAR(cr))
 966     {
 967         gmx_bcast(sizeof(box), box, cr);
 968     }
 969
 970     if (inputrec->cutoff_scheme != ecutsVERLET)
 971     {
 972         gmx_fatal(FARGS, "This group-scheme .tpr file can no longer be run by mdrun. Please update to the Verlet scheme, or use an earlier version of GROMACS if necessary.");
 973     }
 974     /* Update rlist and nstlist. */
 975     prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
 976                           useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
 977
 978     LocalAtomSetManager atomSets;
 979
 980     if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
 981                      inputrec->eI == eiNM))
 982     {
 983         cr->dd = init_domain_decomposition(mdlog, cr, domdecOptions, mdrunOptions,
 984                                            &mtop, inputrec,
 985                                            box, positionsFromStatePointer(globalState.get()),
 986                                            &atomSets);
 987         // Note that local state still does not exist yet.
 988     }
 989     else
 990     {
 991         /* PME, if used, is done on all nodes with 1D decomposition */
 992         cr->npmenodes = 0;
 993         cr->duty      = (DUTY_PP | DUTY_PME);
 994
 995         if (inputrec->ePBC == epbcSCREW)
 996         {
 997             gmx_fatal(FARGS,
 998                       "pbc=screw is only implemented with domain decomposition");
 999         }
1000     }
1001
1002     if (PAR(cr))
1003     {
1004         /* After possible communicator splitting in make_dd_communicators.
1005          * we can set up the intra/inter node communication.
1006          */
1007         gmx_setup_nodecomm(fplog, cr);
1008     }
1009
1010 #if GMX_MPI
1011     if (isMultiSim(ms))
1012     {
1013         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1014                 "This is simulation %d out of %d running as a composite GROMACS\n"
1015                 "multi-simulation job. Setup for this simulation:\n",
1016                 ms->sim, ms->nsim);
1017     }
1018     GMX_LOG(mdlog.warning).appendTextFormatted(
1019             "Using %d MPI %s\n",
1020             cr->nnodes,
1021 #if GMX_THREAD_MPI
1022             cr->nnodes == 1 ? "thread" : "threads"
1023 #else
1024             cr->nnodes == 1 ? "process" : "processes"
1025 #endif
1026             );
1027     fflush(stderr);
1028 #endif
1029
1030     // If mdrun -pin auto honors any affinity setting that already
1031     // exists. If so, it is nice to provide feedback about whether
1032     // that existing affinity setting was from OpenMP or something
1033     // else, so we run this code both before and after we initialize
1034     // the OpenMP support.
1035     gmx_check_thread_affinity_set(mdlog,
1036                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
1037     /* Check and update the number of OpenMP threads requested */
1038     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
1039                                             pmeRunMode, mtop);
1040
1041     gmx_omp_nthreads_init(mdlog, cr,
1042                           hwinfo->nthreads_hw_avail,
1043                           physicalNodeComm.size_,
1044                           hw_opt.nthreads_omp,
1045                           hw_opt.nthreads_omp_pme,
1046                           !thisRankHasDuty(cr, DUTY_PP));
1047
1048     // Enable FP exception detection, but not in
1049     // Release mode and not for compilers with known buggy FP
1050     // exception support (clang with any optimization) or suspected
1051     // buggy FP exception support (gcc 7.* with optimization).
1052 #if !defined NDEBUG && \
1053     !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
1054     && defined __OPTIMIZE__)
1055     const bool bEnableFPE = true;
1056 #else
1057     const bool bEnableFPE = false;
1058 #endif
1059     //FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
1060     if (bEnableFPE)
1061     {
1062         gmx_feenableexcept();
1063     }
1064
1065     // Build a data structure that expresses which kinds of non-bonded
1066     // task are handled by this rank.
1067     //
1068     // TODO Later, this might become a loop over all registered modules
1069     // relevant to the mdp inputs, to find those that have such tasks.
1070     //
1071     // TODO This could move before init_domain_decomposition() as part
1072     // of refactoring that separates the responsibility for duty
1073     // assignment from setup for communication between tasks, and
1074     // setup for tasks handled with a domain (ie including short-ranged
1075     // tasks, bonded tasks, etc.).
1076     //
1077     // Note that in general useGpuForNonbonded, etc. can have a value
1078     // that is inconsistent with the presence of actual GPUs on any
1079     // rank, and that is not known to be a problem until the
1080     // duty of the ranks on a node become known.
1081     //
1082     // TODO Later we might need the concept of computeTasksOnThisRank,
1083     // from which we construct gpuTasksOnThisRank.
1084     //
1085     // Currently the DD code assigns duty to ranks that can
1086     // include PP work that currently can be executed on a single
1087     // GPU, if present and compatible.  This has to be coordinated
1088     // across PP ranks on a node, with possible multiple devices
1089     // or sharing devices on a node, either from the user
1090     // selection, or automatically.
1091     auto                 haveGpus = !gpuIdsToUse.empty();
1092     std::vector<GpuTask> gpuTasksOnThisRank;
1093     if (thisRankHasDuty(cr, DUTY_PP))
1094     {
1095         if (useGpuForNonbonded)
1096         {
1097             // Note that any bonded tasks on a GPU always accompany a
1098             // non-bonded task.
1099             if (haveGpus)
1100             {
1101                 gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
1102             }
1103             else if (nonbondedTarget == TaskTarget::Gpu)
1104             {
1105                 gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because no GPU is detected.");
1106             }
1107             else if (bondedTarget == TaskTarget::Gpu)
1108             {
1109                 gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because no GPU is detected.");
1110             }
1111         }
1112     }
1113     // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not.
1114     if (EEL_PME(inputrec->coulombtype) && (thisRankHasDuty(cr, DUTY_PME)))
1115     {
1116         if (useGpuForPme)
1117         {
1118             if (haveGpus)
1119             {
1120                 gpuTasksOnThisRank.push_back(GpuTask::Pme);
1121             }
1122             else if (pmeTarget == TaskTarget::Gpu)
1123             {
1124                 gmx_fatal(FARGS, "Cannot run PME on a GPU because no GPU is detected.");
1125             }
1126         }
1127     }
1128
1129     GpuTaskAssignment gpuTaskAssignment;
1130     try
1131     {
1132         // Produce the task assignment for this rank.
1133         gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
1134                                               mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank,
1135                                               useGpuForBonded, pmeRunMode);
1136     }
1137     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1138
1139     /* Prevent other ranks from continuing after an issue was found
1140      * and reported as a fatal error.
1141      *
1142      * TODO This function implements a barrier so that MPI runtimes
1143      * can organize an orderly shutdown if one of the ranks has had to
1144      * issue a fatal error in various code already run. When we have
1145      * MPI-aware error handling and reporting, this should be
1146      * improved. */
1147 #if GMX_MPI
1148     if (PAR(cr))
1149     {
1150         MPI_Barrier(cr->mpi_comm_mysim);
1151     }
1152     if (isMultiSim(ms))
1153     {
1154         if (SIMMASTER(cr))
1155         {
1156             MPI_Barrier(ms->mpi_comm_masters);
1157         }
1158         /* We need another barrier to prevent non-master ranks from contiuing
1159          * when an error occured in a different simulation.
1160          */
1161         MPI_Barrier(cr->mpi_comm_mysim);
1162     }
1163 #endif
1164
1165     /* Now that we know the setup is consistent, check for efficiency */
1166     check_resource_division_efficiency(hwinfo, !gpuTaskAssignment.empty(), mdrunOptions.ntompOptionIsSet,
1167                                        cr, mdlog);
1168
1169     gmx_device_info_t *nonbondedDeviceInfo = nullptr;
1170
1171     if (thisRankHasDuty(cr, DUTY_PP))
1172     {
1173         // This works because only one task of each type is currently permitted.
1174         auto nbGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(),
1175                                              hasTaskType<GpuTask::Nonbonded>);
1176         if (nbGpuTaskMapping != gpuTaskAssignment.end())
1177         {
1178             int nonbondedDeviceId = nbGpuTaskMapping->deviceId_;
1179             nonbondedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, nonbondedDeviceId);
1180             init_gpu(nonbondedDeviceInfo);
1181
1182             if (DOMAINDECOMP(cr))
1183             {
1184                 /* When we share GPUs over ranks, we need to know this for the DLB */
1185                 dd_setup_dlb_resource_sharing(cr, nonbondedDeviceId);
1186             }
1187
1188         }
1189     }
1190
1191     std::unique_ptr<ClfftInitializer> initializedClfftLibrary;
1192
1193     gmx_device_info_t                *pmeDeviceInfo = nullptr;
1194     // Later, this program could contain kernels that might be later
1195     // re-used as auto-tuning progresses, or subsequent simulations
1196     // are invoked.
1197     PmeGpuProgramStorage pmeGpuProgram;
1198     // This works because only one task of each type is currently permitted.
1199     auto                 pmeGpuTaskMapping     = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), hasTaskType<GpuTask::Pme>);
1200     const bool           thisRankHasPmeGpuTask = (pmeGpuTaskMapping != gpuTaskAssignment.end());
1201     if (thisRankHasPmeGpuTask)
1202     {
1203         pmeDeviceInfo = getDeviceInfo(hwinfo->gpu_info, pmeGpuTaskMapping->deviceId_);
1204         init_gpu(pmeDeviceInfo);
1205         pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
1206         // TODO It would be nice to move this logic into the factory
1207         // function. See Redmine #2535.
1208         bool isMasterThread = !GMX_THREAD_MPI || MASTER(cr);
1209         if (pmeRunMode == PmeRunMode::GPU && !initializedClfftLibrary && isMasterThread)
1210         {
1211             initializedClfftLibrary = initializeClfftLibrary();
1212         }
1213     }
1214
1215     /* getting number of PP/PME threads on this MPI / tMPI rank.
1216        PME: env variable should be read only on one node to make sure it is
1217        identical everywhere;
1218      */
1219     const int numThreadsOnThisRank =
1220         thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded) : gmx_omp_nthreads_get(emntPME);
1221     checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid,
1222                                   *hwinfo->hardwareTopology,
1223                                   physicalNodeComm, mdlog);
1224
1225     if (hw_opt.threadAffinity != ThreadAffinity::Off)
1226     {
1227         /* Before setting affinity, check whether the affinity has changed
1228          * - which indicates that probably the OpenMP library has changed it
1229          * since we first checked).
1230          */
1231         gmx_check_thread_affinity_set(mdlog,
1232                                       &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
1233
1234         int numThreadsOnThisNode, intraNodeThreadOffset;
1235         analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
1236                                  &intraNodeThreadOffset);
1237
1238         /* Set the CPU affinity */
1239         gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology,
1240                                 numThreadsOnThisRank, numThreadsOnThisNode,
1241                                 intraNodeThreadOffset, nullptr);
1242     }
1243
1244     if (mdrunOptions.timingOptions.resetStep > -1)
1245     {
1246         GMX_LOG(mdlog.info).asParagraph().
1247             appendText("The -resetstep functionality is deprecated, and may be removed in a future version.");
1248     }
1249     wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1250
1251     if (PAR(cr))
1252     {
1253         /* Master synchronizes its value of reset_counters with all nodes
1254          * including PME only nodes */
1255         int64_t reset_counters = wcycle_get_reset_counters(wcycle);
1256         gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
1257         wcycle_set_reset_counters(wcycle, reset_counters);
1258     }
1259
1260     // Membrane embedding must be initialized before we call init_forcerec()
1261     if (doMembed)
1262     {
1263         if (MASTER(cr))
1264         {
1265             fprintf(stderr, "Initializing membed");
1266         }
1267         /* Note that membed cannot work in parallel because mtop is
1268          * changed here. Fix this if we ever want to make it run with
1269          * multiple ranks. */
1270         membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec, globalState.get(), cr,
1271                              &mdrunOptions
1272                                  .checkpointOptions.period);
1273     }
1274
1275     std::unique_ptr<MDAtoms>     mdAtoms;
1276     std::unique_ptr<gmx_vsite_t> vsite;
1277
1278     t_nrnb nrnb;
1279     if (thisRankHasDuty(cr, DUTY_PP))
1280     {
1281         /* Initiate forcerecord */
1282         fr                 = new t_forcerec;
1283         fr->forceProviders = mdModules_->initForceProviders();
1284         init_forcerec(fplog, mdlog, fr, fcd,
1285                       inputrec, &mtop, cr, box,
1286                       opt2fn("-table", filenames.size(), filenames.data()),
1287                       opt2fn("-tablep", filenames.size(), filenames.data()),
1288                       opt2fns("-tableb", filenames.size(), filenames.data()),
1289                       *hwinfo, nonbondedDeviceInfo,
1290                       useGpuForBonded,
1291                       FALSE,
1292                       pforce,
1293                       wcycle);
1294
1295         /* Initialize the mdAtoms structure.
1296          * mdAtoms is not filled with atom data,
1297          * as this can not be done now with domain decomposition.
1298          */
1299         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1300         if (globalState && thisRankHasPmeGpuTask)
1301         {
1302             // The pinning of coordinates in the global state object works, because we only use
1303             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1304             // points to the global state object without DD.
1305             // FIXME: MD and EM separately set up the local state - this should happen in the same function,
1306             // which should also perform the pinning.
1307             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1308         }
1309
1310         /* Initialize the virtual site communication */
1311         vsite = initVsite(mtop, cr);
1312
1313         calc_shifts(box, fr->shift_vec);
1314
1315         /* With periodic molecules the charge groups should be whole at start up
1316          * and the virtual sites should not be far from their proper positions.
1317          */
1318         if (!inputrec->bContinuation && MASTER(cr) &&
1319             !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
1320         {
1321             /* Make molecules whole at start of run */
1322             if (fr->ePBC != epbcNONE)
1323             {
1324                 do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
1325             }
1326             if (vsite)
1327             {
1328                 /* Correct initial vsite positions are required
1329                  * for the initial distribution in the domain decomposition
1330                  * and for the initial shell prediction.
1331                  */
1332                 constructVsitesGlobal(mtop, globalState->x);
1333             }
1334         }
1335
1336         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1337         {
1338             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1339             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1340         }
1341     }
1342     else
1343     {
1344         /* This is a PME only node */
1345
1346         GMX_ASSERT(globalState == nullptr, "We don't need the state on a PME only rank and expect it to be unitialized");
1347
1348         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1349         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1350     }
1351
1352     gmx_pme_t *sepPmeData = nullptr;
1353     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1354     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), "Double-checking that only PME-only ranks have no forcerec");
1355     gmx_pme_t * &pmedata = fr ? fr->pmedata : sepPmeData;
1356
1357     /* Initiate PME if necessary,
1358      * either on all nodes or on dedicated PME nodes only. */
1359     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1360     {
1361         if (mdAtoms && mdAtoms->mdatoms())
1362         {
1363             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1364             if (EVDW_PME(inputrec->vdwtype))
1365             {
1366                 nTypePerturbed   = mdAtoms->mdatoms()->nTypePerturbed;
1367             }
1368         }
1369         if (cr->npmenodes > 0)
1370         {
1371             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1372             gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
1373             gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
1374         }
1375
1376         if (thisRankHasDuty(cr, DUTY_PME))
1377         {
1378             try
1379             {
1380                 pmedata = gmx_pme_init(cr,
1381                                        getNumPmeDomains(cr->dd),
1382                                        inputrec,
1383                                        nChargePerturbed != 0, nTypePerturbed != 0,
1384                                        mdrunOptions.reproducible,
1385                                        ewaldcoeff_q, ewaldcoeff_lj,
1386                                        gmx_omp_nthreads_get(emntPME),
1387                                        pmeRunMode, nullptr,
1388                                        pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
1389             }
1390             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1391         }
1392     }
1393
1394
1395     if (EI_DYNAMICS(inputrec->eI))
1396     {
1397         /* Turn on signal handling on all nodes */
1398         /*
1399          * (A user signal from the PME nodes (if any)
1400          * is communicated to the PP nodes.
1401          */
1402         signal_handler_install();
1403     }
1404
1405     pull_t *pull_work = nullptr;
1406     if (thisRankHasDuty(cr, DUTY_PP))
1407     {
1408         /* Assumes uniform use of the number of OpenMP threads */
1409         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
1410
1411         if (inputrec->bPull)
1412         {
1413             /* Initialize pull code */
1414             pull_work =
1415                 init_pull(fplog, inputrec->pull, inputrec,
1416                           &mtop, cr, &atomSets, inputrec->fepvals->init_lambda);
1417             if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
1418             {
1419                 initPullHistory(pull_work, &observablesHistory);
1420             }
1421             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1422             {
1423                 init_pull_output_files(pull_work,
1424                                        filenames.size(), filenames.data(), oenv,
1425                                        startingBehavior);
1426             }
1427         }
1428
1429         std::unique_ptr<EnforcedRotation> enforcedRotation;
1430         if (inputrec->bRot)
1431         {
1432             /* Initialize enforced rotation code */
1433             enforcedRotation = init_rot(fplog,
1434                                         inputrec,
1435                                         filenames.size(),
1436                                         filenames.data(),
1437                                         cr,
1438                                         &atomSets,
1439                                         globalState.get(),
1440                                         &mtop,
1441                                         oenv,
1442                                         mdrunOptions,
1443                                         startingBehavior);
1444         }
1445
1446         t_swap *swap = nullptr;
1447         if (inputrec->eSwapCoords != eswapNO)
1448         {
1449             /* Initialize ion swapping code */
1450             swap = init_swapcoords(fplog, inputrec,
1451                                    opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
1452                                    &mtop, globalState.get(), &observablesHistory,
1453                                    cr, &atomSets, oenv, mdrunOptions,
1454                                    startingBehavior);
1455         }
1456
1457         /* Let makeConstraints know whether we have essential dynamics constraints.
1458          * TODO: inputrec should tell us whether we use an algorithm, not a file option or the checkpoint
1459          */
1460         bool doEssentialDynamics = (opt2fn_null("-ei", filenames.size(), filenames.data()) != nullptr
1461                                     || observablesHistory.edsamHistory);
1462         auto constr              = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics,
1463                                                    fplog, *mdAtoms->mdatoms(),
1464                                                    cr, ms, &nrnb, wcycle, fr->bMolPBC);
1465
1466         /* Energy terms and groups */
1467         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(), inputrec->fepvals->n_lambda);
1468
1469         /* Set up interactive MD (IMD) */
1470         auto imdSession = makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
1471                                          MASTER(cr) ? globalState->x.rvec_array() : nullptr,
1472                                          filenames.size(), filenames.data(), oenv, mdrunOptions.imdOptions,
1473                                          startingBehavior);
1474
1475         if (DOMAINDECOMP(cr))
1476         {
1477             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1478             /* This call is not included in init_domain_decomposition mainly
1479              * because fr->cginfo_mb is set later.
1480              */
1481             dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
1482                             domdecOptions.checkBondedInteractions,
1483                             fr->cginfo_mb);
1484         }
1485
1486         // TODO This is not the right place to manage the lifetime of
1487         // this data structure, but currently it's the easiest way to
1488         // make it work. Later, it should probably be made/updated
1489         // after the workload for the lifetime of a PP domain is
1490         // understood.
1491         PpForceWorkload ppForceWorkload;
1492
1493         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
1494         /* Now do whatever the user wants us to do (how flexible...) */
1495         LegacySimulator simulator {
1496             fplog, cr, ms, mdlog, static_cast<int>(filenames.size()), filenames.data(),
1497             oenv,
1498             mdrunOptions,
1499             startingBehavior,
1500             vsite.get(), constr.get(),
1501             enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
1502             deform.get(),
1503             mdModules_->outputProvider(),
1504             inputrec, imdSession.get(), pull_work, swap, &mtop,
1505             fcd,
1506             globalState.get(),
1507             &observablesHistory,
1508             mdAtoms.get(), &nrnb, wcycle, fr,
1509             &enerd,
1510             &ppForceWorkload,
1511             replExParams,
1512             membed,
1513             walltime_accounting,
1514             std::move(stopHandlerBuilder_)
1515         };
1516         simulator.run(inputrec->eI, doRerun);
1517
1518         if (inputrec->bPull)
1519         {
1520             finish_pull(pull_work);
1521         }
1522         finish_swapcoords(swap);
1523     }
1524     else
1525     {
1526         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
1527         /* do PME only */
1528         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
1529         gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
1530     }
1531
1532     wallcycle_stop(wcycle, ewcRUN);
1533
1534     /* Finish up, write some stuff
1535      * if rerunMD, don't write last frame again
1536      */
1537     finish_run(fplog, mdlog, cr,
1538                inputrec, &nrnb, wcycle, walltime_accounting,
1539                fr ? fr->nbv.get() : nullptr,
1540                pmedata,
1541                EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
1542
1543     // clean up cycle counter
1544     wallcycle_destroy(wcycle);
1545
1546 // Free PME data
1547     if (pmedata)
1548     {
1549         gmx_pme_destroy(pmedata);
1550         pmedata = nullptr;
1551     }
1552
1553     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
1554     // before we destroy the GPU context(s) in free_gpu_resources().
1555     // Pinned buffers are associated with contexts in CUDA.
1556     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
1557     mdAtoms.reset(nullptr);
1558     globalState.reset(nullptr);
1559     mdModules_.reset(nullptr);   // destruct force providers here as they might also use the GPU
1560
1561     /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
1562     free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
1563     free_gpu(nonbondedDeviceInfo);
1564     free_gpu(pmeDeviceInfo);
1565     done_forcerec(fr, mtop.molblock.size());
1566     sfree(fcd);
1567
1568     if (doMembed)
1569     {
1570         free_membed(membed);
1571     }
1572
1573     /* Does what it says */
1574     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
1575     walltime_accounting_destroy(walltime_accounting);
1576
1577     // Ensure log file content is written
1578     if (logFileHandle)
1579     {
1580         gmx_fio_flush(logFileHandle);
1581     }
1582
1583     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
1584      * exceptions were enabled before function was called. */
1585     if (bEnableFPE)
1586     {
1587         gmx_fedisableexcept();
1588     }
1589
1590     auto rc = static_cast<int>(gmx_get_stop_condition());
1591
1592 #if GMX_THREAD_MPI
1593     /* we need to join all threads. The sub-threads join when they
1594        exit this function, but the master thread needs to be told to
1595        wait for that. */
1596     if (PAR(cr) && MASTER(cr))
1597     {
1598         tMPI_Finalize();
1599     }
1600     //TODO free commrec in MPI simulations
1601     done_commrec(cr);
1602 #endif
1603     return rc;
1604 }
1605
1606 Mdrunner::~Mdrunner()
1607 {
1608     // Clean up of the Manager.
1609     // This will end up getting called on every thread-MPI rank, which is unnecessary,
1610     // but okay as long as threads synchronize some time before adding or accessing
1611     // a new set of restraints.
1612     if (restraintManager_)
1613     {
1614         restraintManager_->clear();
1615         GMX_ASSERT(restraintManager_->countRestraints() == 0,
1616                    "restraints added during runner life time should be cleared at runner destruction.");
1617     }
1618 };
1619
1620 void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller,
1621                             std::string                               name)
1622 {
1623     GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
1624     // Not sure if this should be logged through the md logger or something else,
1625     // but it is helpful to have some sort of INFO level message sent somewhere.
1626     //    std::cout << "Registering restraint named " << name << std::endl;
1627
1628     // When multiple restraints are used, it may be wasteful to register them separately.
1629     // Maybe instead register an entire Restraint Manager as a force provider.
1630     restraintManager_->addToSpec(std::move(puller),
1631                                  std::move(name));
1632 }
1633
1634 Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules)
1635     : mdModules_(std::move(mdModules))
1636 {
1637 }
1638
1639 Mdrunner::Mdrunner(Mdrunner &&) noexcept = default;
1640
1641 //NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
1642 Mdrunner &Mdrunner::operator=(Mdrunner && /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
1643
1644 class Mdrunner::BuilderImplementation
1645 {
1646     public:
1647         BuilderImplementation() = delete;
1648         BuilderImplementation(std::unique_ptr<MDModules> mdModules,
1649                               SimulationContext        * context);
1650         ~BuilderImplementation();
1651
1652         BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options,
1653                                                     real                forceWarningThreshold,
1654                                                     StartingBehavior    startingBehavior);
1655
1656         void addDomdec(const DomdecOptions &options);
1657
1658         void addVerletList(int nstlist);
1659
1660         void addReplicaExchange(const ReplicaExchangeParameters &params);
1661
1662         void addMultiSim(gmx_multisim_t* multisim);
1663
1664         void addNonBonded(const char* nbpu_opt);
1665
1666         void addPME(const char* pme_opt_, const char* pme_fft_opt_);
1667
1668         void addBondedTaskAssignment(const char* bonded_opt);
1669
1670         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
1671
1672         void addFilenames(ArrayRef <const t_filenm> filenames);
1673
1674         void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
1675
1676         void addLogFile(t_fileio *logFileHandle);
1677
1678         void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
1679
1680         Mdrunner build();
1681
1682     private:
1683
1684         // Default parameters copied from runner.h
1685         // \todo Clarify source(s) of default parameters.
1686
1687         const char* nbpu_opt_    = nullptr;
1688         const char* pme_opt_     = nullptr;
1689         const char* pme_fft_opt_ = nullptr;
1690         const char *bonded_opt_  = nullptr;
1691
1692         MdrunOptions                          mdrunOptions_;
1693
1694         DomdecOptions                         domdecOptions_;
1695
1696         ReplicaExchangeParameters             replicaExchangeParameters_;
1697
1698         //! Command-line override for the duration of a neighbor list with the Verlet scheme.
1699         int         nstlist_ = 0;
1700
1701         //! Non-owning multisim communicator handle.
1702         std::unique_ptr<gmx_multisim_t*>      multisim_ = nullptr;
1703
1704         //! Print a warning if any force is larger than this (in kJ/mol nm).
1705         real forceWarningThreshold_ = -1;
1706
1707         //! Whether the simulation will start afresh, or restart with/without appending.
1708         StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
1709
1710         //! The modules that comprise the functionality of mdrun.
1711         std::unique_ptr<MDModules> mdModules_;
1712
1713         /*! \brief  Non-owning pointer to SimulationContext (owned and managed by client)
1714          *
1715          * \internal
1716          * \todo Establish robust protocol to make sure resources remain valid.
1717          * SimulationContext will likely be separated into multiple layers for
1718          * different levels of access and different phases of execution. Ref
1719          * https://redmine.gromacs.org/issues/2375
1720          * https://redmine.gromacs.org/issues/2587
1721          */
1722         SimulationContext* context_ = nullptr;
1723
1724         //! \brief Parallelism information.
1725         gmx_hw_opt_t hardwareOptions_;
1726
1727         //! filename options for simulation.
1728         ArrayRef<const t_filenm> filenames_;
1729
1730         /*! \brief Handle to output environment.
1731          *
1732          * \todo gmx_output_env_t needs lifetime management.
1733          */
1734         gmx_output_env_t*    outputEnvironment_ = nullptr;
1735
1736         /*! \brief Non-owning handle to MD log file.
1737          *
1738          * \todo Context should own output facilities for client.
1739          * \todo Improve log file handle management.
1740          * \internal
1741          * Code managing the FILE* relies on the ability to set it to
1742          * nullptr to check whether the filehandle is valid.
1743          */
1744         t_fileio* logFileHandle_ = nullptr;
1745
1746         /*!
1747          * \brief Builder for simulation stop signal handler.
1748          */
1749         std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
1750 };
1751
1752 Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules,
1753                                                        SimulationContext        * context) :
1754     mdModules_(std::move(mdModules)),
1755     context_(context)
1756 {
1757     GMX_ASSERT(context_, "Bug found. It should not be possible to construct builder without a valid context.");
1758 }
1759
1760 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
1761
1762 Mdrunner::BuilderImplementation &
1763 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions    &options,
1764                                                       const real             forceWarningThreshold,
1765                                                       const StartingBehavior startingBehavior)
1766 {
1767     mdrunOptions_          = options;
1768     forceWarningThreshold_ = forceWarningThreshold;
1769     startingBehavior_      = startingBehavior;
1770     return *this;
1771 }
1772
1773 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions &options)
1774 {
1775     domdecOptions_ = options;
1776 }
1777
1778 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
1779 {
1780     nstlist_ = nstlist;
1781 }
1782
1783 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters &params)
1784 {
1785     replicaExchangeParameters_ = params;
1786 }
1787
1788 void Mdrunner::BuilderImplementation::addMultiSim(gmx_multisim_t* multisim)
1789 {
1790     multisim_ = std::make_unique<gmx_multisim_t*>(multisim);
1791 }
1792
1793 Mdrunner Mdrunner::BuilderImplementation::build()
1794 {
1795     auto newRunner = Mdrunner(std::move(mdModules_));
1796
1797     GMX_ASSERT(context_, "Bug found. It should not be possible to call build() without a valid context.");
1798
1799     newRunner.mdrunOptions          = mdrunOptions_;
1800     newRunner.pforce                = forceWarningThreshold_;
1801     newRunner.startingBehavior      = startingBehavior_;
1802     newRunner.domdecOptions         = domdecOptions_;
1803
1804     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
1805     newRunner.hw_opt          = hardwareOptions_;
1806
1807     // No invariant to check. This parameter exists to optionally override other behavior.
1808     newRunner.nstlist_cmdline = nstlist_;
1809
1810     newRunner.replExParams    = replicaExchangeParameters_;
1811
1812     newRunner.filenames = filenames_;
1813
1814     GMX_ASSERT(context_->communicationRecord_, "SimulationContext communications not initialized.");
1815     newRunner.cr = context_->communicationRecord_;
1816
1817     if (multisim_)
1818     {
1819         // nullptr is a valid value for the multisim handle, so we don't check the pointed-to pointer.
1820         newRunner.ms = *multisim_;
1821     }
1822     else
1823     {
1824         GMX_THROW(gmx::APIError("MdrunnerBuilder::addMultiSim() is required before build()"));
1825     }
1826
1827     // \todo Clarify ownership and lifetime management for gmx_output_env_t
1828     // \todo Update sanity checking when output environment has clearly specified invariants.
1829     // Initialization and default values for oenv are not well specified in the current version.
1830     if (outputEnvironment_)
1831     {
1832         newRunner.oenv  = outputEnvironment_;
1833     }
1834     else
1835     {
1836         GMX_THROW(gmx::APIError("MdrunnerBuilder::addOutputEnvironment() is required before build()"));
1837     }
1838
1839     newRunner.logFileHandle = logFileHandle_;
1840
1841     if (nbpu_opt_)
1842     {
1843         newRunner.nbpu_opt    = nbpu_opt_;
1844     }
1845     else
1846     {
1847         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
1848     }
1849
1850     if (pme_opt_ && pme_fft_opt_)
1851     {
1852         newRunner.pme_opt     = pme_opt_;
1853         newRunner.pme_fft_opt = pme_fft_opt_;
1854     }
1855     else
1856     {
1857         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
1858     }
1859
1860     if (bonded_opt_)
1861     {
1862         newRunner.bonded_opt = bonded_opt_;
1863     }
1864     else
1865     {
1866         GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
1867     }
1868
1869     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
1870
1871     if (stopHandlerBuilder_)
1872     {
1873         newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
1874     }
1875     else
1876     {
1877         newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
1878     }
1879
1880     return newRunner;
1881 }
1882
1883 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
1884 {
1885     nbpu_opt_ = nbpu_opt;
1886 }
1887
1888 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
1889                                              const char* pme_fft_opt)
1890 {
1891     pme_opt_     = pme_opt;
1892     pme_fft_opt_ = pme_fft_opt;
1893 }
1894
1895 void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
1896 {
1897     bonded_opt_ = bonded_opt;
1898 }
1899
1900 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
1901 {
1902     hardwareOptions_ = hardwareOptions;
1903 }
1904
1905 void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
1906 {
1907     filenames_ = filenames;
1908 }
1909
1910 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
1911 {
1912     outputEnvironment_ = outputEnvironment;
1913 }
1914
1915 void Mdrunner::BuilderImplementation::addLogFile(t_fileio *logFileHandle)
1916 {
1917     logFileHandle_ = logFileHandle;
1918 }
1919
1920 void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
1921 {
1922     stopHandlerBuilder_ = std::move(builder);
1923 }
1924
1925 MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
1926                                  compat::not_null<SimulationContext*> context) :
1927     impl_ {std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules), context)}
1928 {
1929 }
1930
1931 MdrunnerBuilder::~MdrunnerBuilder() = default;
1932
1933 MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions    &options,
1934                                                       real                   forceWarningThreshold,
1935                                                       const StartingBehavior startingBehavior)
1936 {
1937     impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
1938     return *this;
1939 }
1940
1941 MdrunnerBuilder &MdrunnerBuilder::addDomainDecomposition(const DomdecOptions &options)
1942 {
1943     impl_->addDomdec(options);
1944     return *this;
1945 }
1946
1947 MdrunnerBuilder &MdrunnerBuilder::addNeighborList(int nstlist)
1948 {
1949     impl_->addVerletList(nstlist);
1950     return *this;
1951 }
1952
1953 MdrunnerBuilder &MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters &params)
1954 {
1955     impl_->addReplicaExchange(params);
1956     return *this;
1957 }
1958
1959 MdrunnerBuilder &MdrunnerBuilder::addMultiSim(gmx_multisim_t* multisim)
1960 {
1961     impl_->addMultiSim(multisim);
1962     return *this;
1963 }
1964
1965 MdrunnerBuilder &MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
1966 {
1967     impl_->addNonBonded(nbpu_opt);
1968     return *this;
1969 }
1970
1971 MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
1972                                                     const char* pme_fft_opt)
1973 {
1974     // The builder method may become more general in the future, but in this version,
1975     // parameters for PME electrostatics are both required and the only parameters
1976     // available.
1977     if (pme_opt && pme_fft_opt)
1978     {
1979         impl_->addPME(pme_opt, pme_fft_opt);
1980     }
1981     else
1982     {
1983         GMX_THROW(gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
1984     }
1985     return *this;
1986 }
1987
1988 MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
1989 {
1990     impl_->addBondedTaskAssignment(bonded_opt);
1991     return *this;
1992 }
1993
1994 Mdrunner MdrunnerBuilder::build()
1995 {
1996     return impl_->build();
1997 }
1998
1999 MdrunnerBuilder &MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
2000 {
2001     impl_->addHardwareOptions(hardwareOptions);
2002     return *this;
2003 }
2004
2005 MdrunnerBuilder &MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
2006 {
2007     impl_->addFilenames(filenames);
2008     return *this;
2009 }
2010
2011 MdrunnerBuilder &MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2012 {
2013     impl_->addOutputEnvironment(outputEnvironment);
2014     return *this;
2015 }
2016
2017 MdrunnerBuilder &MdrunnerBuilder::addLogFile(t_fileio *logFileHandle)
2018 {
2019     impl_->addLogFile(logFileHandle);
2020     return *this;
2021 }
2022
2023 MdrunnerBuilder &MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2024 {
2025     impl_->addStopHandlerBuilder(std::move(builder));
2026     return *this;
2027 }
2028
2029 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder &&) noexcept = default;
2030
2031 MdrunnerBuilder &MdrunnerBuilder::operator=(MdrunnerBuilder &&) noexcept = default;
2032
2033 } // namespace gmx