src/gromacs/mdrun/runner.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the MD runner routine calling all integrators.
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include "runner.h"
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cinttypes>
  52 #include <csignal>
  53 #include <cstdlib>
  54 #include <cstring>
  55
  56 #include <algorithm>
  57 #include <memory>
  58
  59 #include "gromacs/commandline/filenm.h"
  60 #include "gromacs/domdec/domdec.h"
  61 #include "gromacs/domdec/domdec_struct.h"
  62 #include "gromacs/domdec/localatomsetmanager.h"
  63 #include "gromacs/domdec/partition.h"
  64 #include "gromacs/ewald/ewald_utils.h"
  65 #include "gromacs/ewald/pme.h"
  66 #include "gromacs/ewald/pme_gpu_program.h"
  67 #include "gromacs/fileio/checkpoint.h"
  68 #include "gromacs/fileio/gmxfio.h"
  69 #include "gromacs/fileio/oenv.h"
  70 #include "gromacs/fileio/tpxio.h"
  71 #include "gromacs/gmxlib/network.h"
  72 #include "gromacs/gmxlib/nrnb.h"
  73 #include "gromacs/gpu_utils/clfftinitializer.h"
  74 #include "gromacs/gpu_utils/gpu_utils.h"
  75 #include "gromacs/hardware/cpuinfo.h"
  76 #include "gromacs/hardware/detecthardware.h"
  77 #include "gromacs/hardware/printhardware.h"
  78 #include "gromacs/imd/imd.h"
  79 #include "gromacs/listed_forces/disre.h"
  80 #include "gromacs/listed_forces/gpubonded.h"
  81 #include "gromacs/listed_forces/orires.h"
  82 #include "gromacs/math/functions.h"
  83 #include "gromacs/math/utilities.h"
  84 #include "gromacs/math/vec.h"
  85 #include "gromacs/mdlib/boxdeformation.h"
  86 #include "gromacs/mdlib/broadcaststructs.h"
  87 #include "gromacs/mdlib/calc_verletbuf.h"
  88 #include "gromacs/mdlib/dispersioncorrection.h"
  89 #include "gromacs/mdlib/enerdata_utils.h"
  90 #include "gromacs/mdlib/force.h"
  91 #include "gromacs/mdlib/forcerec.h"
  92 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  93 #include "gromacs/mdlib/makeconstraints.h"
  94 #include "gromacs/mdlib/md_support.h"
  95 #include "gromacs/mdlib/mdatoms.h"
  96 #include "gromacs/mdlib/membed.h"
  97 #include "gromacs/mdlib/ppforceworkload.h"
  98 #include "gromacs/mdlib/qmmm.h"
  99 #include "gromacs/mdlib/sighandler.h"
 100 #include "gromacs/mdlib/stophandler.h"
 101 #include "gromacs/mdrun/mdmodules.h"
 102 #include "gromacs/mdrun/simulationcontext.h"
 103 #include "gromacs/mdrunutility/handlerestart.h"
 104 #include "gromacs/mdrunutility/logging.h"
 105 #include "gromacs/mdrunutility/multisim.h"
 106 #include "gromacs/mdrunutility/printtime.h"
 107 #include "gromacs/mdrunutility/threadaffinity.h"
 108 #include "gromacs/mdtypes/commrec.h"
 109 #include "gromacs/mdtypes/enerdata.h"
 110 #include "gromacs/mdtypes/fcdata.h"
 111 #include "gromacs/mdtypes/inputrec.h"
 112 #include "gromacs/mdtypes/md_enums.h"
 113 #include "gromacs/mdtypes/mdrunoptions.h"
 114 #include "gromacs/mdtypes/observableshistory.h"
 115 #include "gromacs/mdtypes/state.h"
 116 #include "gromacs/nbnxm/gpu_data_mgmt.h"
 117 #include "gromacs/nbnxm/nbnxm.h"
 118 #include "gromacs/nbnxm/pairlist_tuning.h"
 119 #include "gromacs/pbcutil/pbc.h"
 120 #include "gromacs/pulling/output.h"
 121 #include "gromacs/pulling/pull.h"
 122 #include "gromacs/pulling/pull_rotation.h"
 123 #include "gromacs/restraint/manager.h"
 124 #include "gromacs/restraint/restraintmdmodule.h"
 125 #include "gromacs/restraint/restraintpotential.h"
 126 #include "gromacs/swap/swapcoords.h"
 127 #include "gromacs/taskassignment/decidegpuusage.h"
 128 #include "gromacs/taskassignment/resourcedivision.h"
 129 #include "gromacs/taskassignment/taskassignment.h"
 130 #include "gromacs/taskassignment/usergpuids.h"
 131 #include "gromacs/timing/gpu_timing.h"
 132 #include "gromacs/timing/wallcycle.h"
 133 #include "gromacs/timing/wallcyclereporting.h"
 134 #include "gromacs/topology/mtop_util.h"
 135 #include "gromacs/trajectory/trajectoryframe.h"
 136 #include "gromacs/utility/basenetwork.h"
 137 #include "gromacs/utility/cstringutil.h"
 138 #include "gromacs/utility/exceptions.h"
 139 #include "gromacs/utility/fatalerror.h"
 140 #include "gromacs/utility/filestream.h"
 141 #include "gromacs/utility/gmxassert.h"
 142 #include "gromacs/utility/gmxmpi.h"
 143 #include "gromacs/utility/logger.h"
 144 #include "gromacs/utility/loggerbuilder.h"
 145 #include "gromacs/utility/physicalnodecommunicator.h"
 146 #include "gromacs/utility/pleasecite.h"
 147 #include "gromacs/utility/programcontext.h"
 148 #include "gromacs/utility/smalloc.h"
 149 #include "gromacs/utility/stringutil.h"
 150
 151 #include "replicaexchange.h"
 152 #include "simulator.h"
 153
 154 #if GMX_FAHCORE
 155 #include "corewrap.h"
 156 #endif
 157
 158 namespace gmx
 159 {
 160
 161 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
 162  *
 163  * Used to ensure that the master thread does not modify mdrunner during copy
 164  * on the spawned threads. */
 165 static void threadMpiMdrunnerAccessBarrier()
 166 {
 167 #if GMX_THREAD_MPI
 168     MPI_Barrier(MPI_COMM_WORLD);
 169 #endif
 170 }
 171
 172 Mdrunner Mdrunner::cloneOnSpawnedThread() const
 173 {
 174     auto newRunner = Mdrunner(std::make_unique<MDModules>());
 175
 176     // All runners in the same process share a restraint manager resource because it is
 177     // part of the interface to the client code, which is associated only with the
 178     // original thread. Handles to the same resources can be obtained by copy.
 179     {
 180         newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
 181     }
 182
 183     // Copy original cr pointer before master thread can pass the thread barrier
 184     newRunner.cr  = reinitialize_commrec_for_this_thread(cr);
 185
 186     // Copy members of master runner.
 187     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
 188     // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
 189     newRunner.hw_opt    = hw_opt;
 190     newRunner.filenames = filenames;
 191
 192     newRunner.oenv                = oenv;
 193     newRunner.mdrunOptions        = mdrunOptions;
 194     newRunner.domdecOptions       = domdecOptions;
 195     newRunner.nbpu_opt            = nbpu_opt;
 196     newRunner.pme_opt             = pme_opt;
 197     newRunner.pme_fft_opt         = pme_fft_opt;
 198     newRunner.bonded_opt          = bonded_opt;
 199     newRunner.nstlist_cmdline     = nstlist_cmdline;
 200     newRunner.replExParams        = replExParams;
 201     newRunner.pforce              = pforce;
 202     newRunner.ms                  = ms;
 203     newRunner.startingBehavior    = startingBehavior;
 204     newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
 205
 206     threadMpiMdrunnerAccessBarrier();
 207
 208     GMX_RELEASE_ASSERT(!MASTER(newRunner.cr), "cloneOnSpawnedThread should only be called on spawned threads");
 209
 210     return newRunner;
 211 }
 212
 213 /*! \brief The callback used for running on spawned threads.
 214  *
 215  * Obtains the pointer to the master mdrunner object from the one
 216  * argument permitted to the thread-launch API call, copies it to make
 217  * a new runner for this thread, reinitializes necessary data, and
 218  * proceeds to the simulation. */
 219 static void mdrunner_start_fn(const void *arg)
 220 {
 221     try
 222     {
 223         auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg);
 224         /* copy the arg list to make sure that it's thread-local. This
 225            doesn't copy pointed-to items, of course; fnm, cr and fplog
 226            are reset in the call below, all others should be const. */
 227         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
 228         mdrunner.mdrunner();
 229     }
 230     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 231 }
 232
 233
 234 /*! \brief Start thread-MPI threads.
 235  *
 236  * Called by mdrunner() to start a specific number of threads
 237  * (including the main thread) for thread-parallel runs. This in turn
 238  * calls mdrunner() for each thread. All options are the same as for
 239  * mdrunner(). */
 240 t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const
 241 {
 242
 243     /* first check whether we even need to start tMPI */
 244     if (numThreadsToLaunch < 2)
 245     {
 246         return cr;
 247     }
 248
 249 #if GMX_THREAD_MPI
 250     /* now spawn new threads that start mdrunner_start_fn(), while
 251        the main thread returns. Thread affinity is handled later. */
 252     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
 253                      mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
 254     {
 255         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
 256     }
 257
 258     threadMpiMdrunnerAccessBarrier();
 259 #else
 260     GMX_UNUSED_VALUE(mdrunner_start_fn);
 261 #endif
 262
 263     return reinitialize_commrec_for_this_thread(cr);
 264 }
 265
 266 }  // namespace gmx
 267
 268 /*! \brief Initialize variables for Verlet scheme simulation */
 269 static void prepare_verlet_scheme(FILE                           *fplog,
 270                                   t_commrec                      *cr,
 271                                   t_inputrec                     *ir,
 272                                   int                             nstlist_cmdline,
 273                                   const gmx_mtop_t               *mtop,
 274                                   const matrix                    box,
 275                                   bool                            makeGpuPairList,
 276                                   const gmx::CpuInfo             &cpuinfo)
 277 {
 278     /* For NVE simulations, we will retain the initial list buffer */
 279     if (EI_DYNAMICS(ir->eI) &&
 280         ir->verletbuf_tol > 0 &&
 281         !(EI_MD(ir->eI) && ir->etc == etcNO))
 282     {
 283         /* Update the Verlet buffer size for the current run setup */
 284
 285         /* Here we assume SIMD-enabled kernels are being used. But as currently
 286          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 287          * and 4x2 gives a larger buffer than 4x4, this is ok.
 288          */
 289         ListSetupType      listType  = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
 290         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
 291
 292         const real         rlist_new =
 293             calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
 294
 295         if (rlist_new != ir->rlist)
 296         {
 297             if (fplog != nullptr)
 298             {
 299                 fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 300                         ir->rlist, rlist_new,
 301                         listSetup.cluster_size_i, listSetup.cluster_size_j);
 302             }
 303             ir->rlist     = rlist_new;
 304         }
 305     }
 306
 307     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
 308     {
 309         gmx_fatal(FARGS, "Can not set nstlist without %s",
 310                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
 311     }
 312
 313     if (EI_DYNAMICS(ir->eI))
 314     {
 315         /* Set or try nstlist values */
 316         increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
 317     }
 318 }
 319
 320 /*! \brief Override the nslist value in inputrec
 321  *
 322  * with value passed on the command line (if any)
 323  */
 324 static void override_nsteps_cmdline(const gmx::MDLogger &mdlog,
 325                                     int64_t              nsteps_cmdline,
 326                                     t_inputrec          *ir)
 327 {
 328     assert(ir);
 329
 330     /* override with anything else than the default -2 */
 331     if (nsteps_cmdline > -2)
 332     {
 333         char sbuf_steps[STEPSTRSIZE];
 334         char sbuf_msg[STRLEN];
 335
 336         ir->nsteps = nsteps_cmdline;
 337         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
 338         {
 339             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
 340                     gmx_step_str(nsteps_cmdline, sbuf_steps),
 341                     fabs(nsteps_cmdline*ir->delta_t));
 342         }
 343         else
 344         {
 345             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
 346                     gmx_step_str(nsteps_cmdline, sbuf_steps));
 347         }
 348
 349         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
 350     }
 351     else if (nsteps_cmdline < -2)
 352     {
 353         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64,
 354                   nsteps_cmdline);
 355     }
 356     /* Do nothing if nsteps_cmdline == -2 */
 357 }
 358
 359 namespace gmx
 360 {
 361
 362 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
 363  *
 364  * If not, and if a warning may be issued, logs a warning about
 365  * falling back to CPU code. With thread-MPI, only the first
 366  * call to this function should have \c issueWarning true. */
 367 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
 368                                                const t_inputrec *ir,
 369                                                bool              issueWarning)
 370 {
 371     if (ir->opts.ngener - ir->nwall > 1)
 372     {
 373         /* The GPU code does not support more than one energy group.
 374          * If the user requested GPUs explicitly, a fatal error is given later.
 375          */
 376         if (issueWarning)
 377         {
 378             GMX_LOG(mdlog.warning).asParagraph()
 379                 .appendText("Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
 380                             "For better performance, run on the GPU without energy groups and then do "
 381                             "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.");
 382         }
 383         return false;
 384     }
 385     return true;
 386 }
 387
 388 //! Initializes the logger for mdrun.
 389 static gmx::LoggerOwner buildLogger(FILE *fplog, const t_commrec *cr)
 390 {
 391     gmx::LoggerBuilder builder;
 392     if (fplog != nullptr)
 393     {
 394         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
 395     }
 396     if (cr == nullptr || SIMMASTER(cr))
 397     {
 398         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning,
 399                                 &gmx::TextOutputFile::standardError());
 400     }
 401     return builder.build();
 402 }
 403
 404 //! Make a TaskTarget from an mdrun argument string.
 405 static TaskTarget findTaskTarget(const char *optionString)
 406 {
 407     TaskTarget returnValue = TaskTarget::Auto;
 408
 409     if (strncmp(optionString, "auto", 3) == 0)
 410     {
 411         returnValue = TaskTarget::Auto;
 412     }
 413     else if (strncmp(optionString, "cpu", 3) == 0)
 414     {
 415         returnValue = TaskTarget::Cpu;
 416     }
 417     else if (strncmp(optionString, "gpu", 3) == 0)
 418     {
 419         returnValue = TaskTarget::Gpu;
 420     }
 421     else
 422     {
 423         GMX_ASSERT(false, "Option string should have been checked for sanity already");
 424     }
 425
 426     return returnValue;
 427 }
 428
 429 //! Finish run, aggregate data to print performance info.
 430 static void finish_run(FILE *fplog,
 431                        const gmx::MDLogger &mdlog,
 432                        const t_commrec *cr,
 433                        const t_inputrec *inputrec,
 434                        t_nrnb nrnb[], gmx_wallcycle_t wcycle,
 435                        gmx_walltime_accounting_t walltime_accounting,
 436                        nonbonded_verlet_t *nbv,
 437                        const gmx_pme_t *pme,
 438                        gmx_bool bWriteStat)
 439 {
 440     t_nrnb *nrnb_tot = nullptr;
 441     double  delta_t  = 0;
 442     double  nbfs     = 0, mflop = 0;
 443     double  elapsed_time,
 444             elapsed_time_over_all_ranks,
 445             elapsed_time_over_all_threads,
 446             elapsed_time_over_all_threads_over_all_ranks;
 447     /* Control whether it is valid to print a report. Only the
 448        simulation master may print, but it should not do so if the run
 449        terminated e.g. before a scheduled reset step. This is
 450        complicated by the fact that PME ranks are unaware of the
 451        reason why they were sent a pmerecvqxFINISH. To avoid
 452        communication deadlocks, we always do the communication for the
 453        report, even if we've decided not to write the report, because
 454        how long it takes to finish the run is not important when we've
 455        decided not to report on the simulation performance.
 456
 457        Further, we only report performance for dynamical integrators,
 458        because those are the only ones for which we plan to
 459        consider doing any optimizations. */
 460     bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
 461
 462     if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
 463     {
 464         GMX_LOG(mdlog.warning).asParagraph().appendText("Simulation ended prematurely, no performance report will be written.");
 465         printReport = false;
 466     }
 467
 468     if (cr->nnodes > 1)
 469     {
 470         snew(nrnb_tot, 1);
 471 #if GMX_MPI
 472         MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
 473                       cr->mpi_comm_mysim);
 474 #endif
 475     }
 476     else
 477     {
 478         nrnb_tot = nrnb;
 479     }
 480
 481     elapsed_time                  = walltime_accounting_get_time_since_reset(walltime_accounting);
 482     elapsed_time_over_all_threads = walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
 483     if (cr->nnodes > 1)
 484     {
 485 #if GMX_MPI
 486         /* reduce elapsed_time over all MPI ranks in the current simulation */
 487         MPI_Allreduce(&elapsed_time,
 488                       &elapsed_time_over_all_ranks,
 489                       1, MPI_DOUBLE, MPI_SUM,
 490                       cr->mpi_comm_mysim);
 491         elapsed_time_over_all_ranks /= cr->nnodes;
 492         /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
 493          * current simulation. */
 494         MPI_Allreduce(&elapsed_time_over_all_threads,
 495                       &elapsed_time_over_all_threads_over_all_ranks,
 496                       1, MPI_DOUBLE, MPI_SUM,
 497                       cr->mpi_comm_mysim);
 498 #endif
 499     }
 500     else
 501     {
 502         elapsed_time_over_all_ranks                  = elapsed_time;
 503         elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
 504     }
 505
 506     if (printReport)
 507     {
 508         print_flop(fplog, nrnb_tot, &nbfs, &mflop);
 509     }
 510     if (cr->nnodes > 1)
 511     {
 512         sfree(nrnb_tot);
 513     }
 514
 515     if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
 516     {
 517         print_dd_statistics(cr, inputrec, fplog);
 518     }
 519
 520     /* TODO Move the responsibility for any scaling by thread counts
 521      * to the code that handled the thread region, so that there's a
 522      * mechanism to keep cycle counting working during the transition
 523      * to task parallelism. */
 524     int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 525     int nthreads_pme = gmx_omp_nthreads_get(emntPME);
 526     wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), nthreads_pp, nthreads_pme);
 527     auto cycle_sum(wallcycle_sum(cr, wcycle));
 528
 529     if (printReport)
 530     {
 531         auto                    nbnxn_gpu_timings = (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
 532         gmx_wallclock_gpu_pme_t pme_gpu_timings   = {};
 533
 534         if (pme_gpu_task_enabled(pme))
 535         {
 536             pme_gpu_get_timings(pme, &pme_gpu_timings);
 537         }
 538         wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
 539                         elapsed_time_over_all_ranks,
 540                         wcycle, cycle_sum,
 541                         nbnxn_gpu_timings,
 542                         &pme_gpu_timings);
 543
 544         if (EI_DYNAMICS(inputrec->eI))
 545         {
 546             delta_t = inputrec->delta_t;
 547         }
 548
 549         if (fplog)
 550         {
 551             print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
 552                        elapsed_time_over_all_ranks,
 553                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 554                        delta_t, nbfs, mflop);
 555         }
 556         if (bWriteStat)
 557         {
 558             print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
 559                        elapsed_time_over_all_ranks,
 560                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
 561                        delta_t, nbfs, mflop);
 562         }
 563     }
 564 }
 565
 566 int Mdrunner::mdrunner()
 567 {
 568     matrix                    box;
 569     t_nrnb                   *nrnb;
 570     t_forcerec               *fr               = nullptr;
 571     t_fcdata                 *fcd              = nullptr;
 572     real                      ewaldcoeff_q     = 0;
 573     real                      ewaldcoeff_lj    = 0;
 574     int                       nChargePerturbed = -1, nTypePerturbed = 0;
 575     gmx_wallcycle_t           wcycle;
 576     gmx_walltime_accounting_t walltime_accounting = nullptr;
 577     gmx_membed_t *            membed              = nullptr;
 578     gmx_hw_info_t            *hwinfo              = nullptr;
 579
 580     /* CAUTION: threads may be started later on in this function, so
 581        cr doesn't reflect the final parallel state right now */
 582     t_inputrec                      inputrecInstance;
 583     t_inputrec                     *inputrec = &inputrecInstance;
 584     gmx_mtop_t                      mtop;
 585
 586     bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data());
 587     bool doRerun  = mdrunOptions.rerun;
 588
 589     // Handle task-assignment related user options.
 590     EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
 591                                                EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
 592
 593     std::vector<int> userGpuTaskAssignment;
 594     try
 595     {
 596         userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
 597     }
 598     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 599     auto       nonbondedTarget = findTaskTarget(nbpu_opt);
 600     auto       pmeTarget       = findTaskTarget(pme_opt);
 601     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
 602     auto       bondedTarget    = findTaskTarget(bonded_opt);
 603     PmeRunMode pmeRunMode      = PmeRunMode::None;
 604
 605     // Here we assume that SIMMASTER(cr) does not change even after the
 606     // threads are started.
 607
 608     FILE *fplog = nullptr;
 609     // If we are appending, we don't write log output because we need
 610     // to check that the old log file matches what the checkpoint file
 611     // expects. Otherwise, we should start to write log output now if
 612     // there is a file ready for it.
 613     if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
 614     {
 615         fplog = gmx_fio_getfp(logFileHandle);
 616     }
 617     gmx::LoggerOwner logOwner(buildLogger(fplog, cr));
 618     gmx::MDLogger    mdlog(logOwner.logger());
 619
 620     // With thread-MPI, the communicator changes after threads are
 621     // launched, so this is rebuilt for the master rank at that
 622     // time. The non-master ranks are fine to keep the one made here.
 623     PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
 624     hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
 625
 626     gmx_print_detected_hardware(fplog, isMasterSimMasterRank(ms, MASTER(cr)), mdlog, hwinfo);
 627
 628     std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
 629
 630     // Print citation requests after all software/hardware printing
 631     pleaseCiteGromacs(fplog);
 632
 633     std::unique_ptr<t_state> globalState;
 634
 635     if (SIMMASTER(cr))
 636     {
 637         /* Only the master rank has the global state */
 638         globalState = std::make_unique<t_state>();
 639
 640         /* Read (nearly) all data required for the simulation */
 641         read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), inputrec, globalState.get(), &mtop);
 642     }
 643
 644     /* Check and update the hardware options for internal consistency */
 645     checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks);
 646
 647     if (GMX_THREAD_MPI && SIMMASTER(cr))
 648     {
 649         bool useGpuForNonbonded = false;
 650         bool useGpuForPme       = false;
 651         try
 652         {
 653             // If the user specified the number of ranks, then we must
 654             // respect that, but in default mode, we need to allow for
 655             // the number of GPUs to choose the number of ranks.
 656             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 657             useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
 658                     (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
 659                     canUseGpuForNonbonded,
 660                     gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI),
 661                     hw_opt.nthreads_tmpi);
 662             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
 663                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
 664                     *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
 665
 666         }
 667         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 668
 669         /* Determine how many thread-MPI ranks to start.
 670          *
 671          * TODO Over-writing the user-supplied value here does
 672          * prevent any possible subsequent checks from working
 673          * correctly. */
 674         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
 675                                                 &hw_opt,
 676                                                 gpuIdsToUse,
 677                                                 useGpuForNonbonded,
 678                                                 useGpuForPme,
 679                                                 inputrec, &mtop,
 680                                                 mdlog,
 681                                                 doMembed);
 682
 683         // Now start the threads for thread MPI.
 684         cr = spawnThreads(hw_opt.nthreads_tmpi);
 685         /* The main thread continues here with a new cr. We don't deallocate
 686            the old cr because other threads may still be reading it. */
 687         // TODO Both master and spawned threads call dup_tfn and
 688         // reinitialize_commrec_for_this_thread. Find a way to express
 689         // this better.
 690         physicalNodeComm = PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
 691     }
 692     // END OF CAUTION: cr and physicalNodeComm are now reliable
 693
 694     if (PAR(cr))
 695     {
 696         /* now broadcast everything to the non-master nodes/threads: */
 697         init_parallel(cr, inputrec, &mtop);
 698     }
 699
 700     // Now each rank knows the inputrec that SIMMASTER read and used,
 701     // and (if applicable) cr->nnodes has been assigned the number of
 702     // thread-MPI ranks that have been chosen. The ranks can now all
 703     // run the task-deciding functions and will agree on the result
 704     // without needing to communicate.
 705     //
 706     // TODO Should we do the communication in debug mode to support
 707     // having an assertion?
 708     //
 709     // Note that these variables describe only their own node.
 710     //
 711     // Note that when bonded interactions run on a GPU they always run
 712     // alongside a nonbonded task, so do not influence task assignment
 713     // even though they affect the force calculation workload.
 714     bool useGpuForNonbonded = false;
 715     bool useGpuForPme       = false;
 716     bool useGpuForBonded    = false;
 717     try
 718     {
 719         // It's possible that there are different numbers of GPUs on
 720         // different nodes, which is the user's responsibilty to
 721         // handle. If unsuitable, we will notice that during task
 722         // assignment.
 723         bool gpusWereDetected      = hwinfo->ngpu_compatible_tot > 0;
 724         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
 725         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
 726                                                                 emulateGpuNonbonded,
 727                                                                 canUseGpuForNonbonded,
 728                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI),
 729                                                                 gpusWereDetected);
 730         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
 731                                                     *hwinfo, *inputrec, mtop,
 732                                                     cr->nnodes, domdecOptions.numPmeRanks,
 733                                                     gpusWereDetected);
 734         auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
 735         useGpuForBonded =
 736             decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
 737                                             bondedTarget, canUseGpuForBonded,
 738                                             EVDW_PME(inputrec->vdwtype),
 739                                             EEL_PME_EWALD(inputrec->coulombtype),
 740                                             domdecOptions.numPmeRanks, gpusWereDetected);
 741
 742         pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
 743         if (pmeRunMode == PmeRunMode::GPU)
 744         {
 745             if (pmeFftTarget == TaskTarget::Cpu)
 746             {
 747                 pmeRunMode = PmeRunMode::Mixed;
 748             }
 749         }
 750         else if (pmeFftTarget == TaskTarget::Gpu)
 751         {
 752             gmx_fatal(FARGS, "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME on CPU you should not be using -pmefft.");
 753         }
 754     }
 755     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 756
 757     // Build restraints.
 758     // TODO: hide restraint implementation details from Mdrunner.
 759     // There is nothing unique about restraints at this point as far as the
 760     // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
 761     // factory functions from the SimulationContext on which to call mdModules_->add().
 762     // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
 763     for (auto && restraint : restraintManager_->getRestraints())
 764     {
 765         auto module = RestraintMDModule::create(restraint,
 766                                                 restraint->sites());
 767         mdModules_->add(std::move(module));
 768     }
 769
 770     // TODO: Error handling
 771     mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
 772
 773     if (fplog != nullptr)
 774     {
 775         pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
 776         fprintf(fplog, "\n");
 777     }
 778
 779     if (SIMMASTER(cr))
 780     {
 781         /* In rerun, set velocities to zero if present */
 782         if (doRerun && ((globalState->flags & (1 << estV)) != 0))
 783         {
 784             // rerun does not use velocities
 785             GMX_LOG(mdlog.info).asParagraph().appendText(
 786                     "Rerun trajectory contains velocities. Rerun does only evaluate "
 787                     "potential energy and forces. The velocities will be ignored.");
 788             for (int i = 0; i < globalState->natoms; i++)
 789             {
 790                 clear_rvec(globalState->v[i]);
 791             }
 792             globalState->flags &= ~(1 << estV);
 793         }
 794
 795         /* now make sure the state is initialized and propagated */
 796         set_state_entries(globalState.get(), inputrec);
 797     }
 798
 799     /* NM and TPI parallelize over force/energy calculations, not atoms,
 800      * so we need to initialize and broadcast the global state.
 801      */
 802     if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
 803     {
 804         if (!MASTER(cr))
 805         {
 806             globalState = std::make_unique<t_state>();
 807         }
 808         broadcastStateWithoutDynamics(cr, globalState.get());
 809     }
 810
 811     /* A parallel command line option consistency check that we can
 812        only do after any threads have started. */
 813     if (!PAR(cr) && (domdecOptions.numCells[XX] > 1 ||
 814                      domdecOptions.numCells[YY] > 1 ||
 815                      domdecOptions.numCells[ZZ] > 1 ||
 816                      domdecOptions.numPmeRanks > 0))
 817     {
 818         gmx_fatal(FARGS,
 819                   "The -dd or -npme option request a parallel simulation, "
 820 #if !GMX_MPI
 821                   "but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv));
 822 #else
 823 #if GMX_THREAD_MPI
 824                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
 825 #else
 826                   "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv));
 827 #endif
 828 #endif
 829     }
 830
 831     if (doRerun &&
 832         (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 833     {
 834         gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 835     }
 836
 837     if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
 838     {
 839         if (domdecOptions.numPmeRanks > 0)
 840         {
 841             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
 842                                  "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
 843         }
 844
 845         domdecOptions.numPmeRanks = 0;
 846     }
 847
 848     if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
 849     {
 850         /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
 851          * improve performance with many threads per GPU, since our OpenMP
 852          * scaling is bad, but it's difficult to automate the setup.
 853          */
 854         domdecOptions.numPmeRanks = 0;
 855     }
 856     if (useGpuForPme)
 857     {
 858         if (domdecOptions.numPmeRanks < 0)
 859         {
 860             domdecOptions.numPmeRanks = 0;
 861             // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
 862         }
 863         else
 864         {
 865             GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported");
 866         }
 867     }
 868
 869 #if GMX_FAHCORE
 870     if (MASTER(cr))
 871     {
 872         fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
 873     }
 874 #endif
 875
 876     /* NMR restraints must be initialized before load_checkpoint,
 877      * since with time averaging the history is added to t_state.
 878      * For proper consistency check we therefore need to extend
 879      * t_state here.
 880      * So the PME-only nodes (if present) will also initialize
 881      * the distance restraints.
 882      */
 883     snew(fcd, 1);
 884
 885     /* This needs to be called before read_checkpoint to extend the state */
 886     init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
 887
 888     init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
 889
 890     auto                 deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
 891
 892     ObservablesHistory   observablesHistory = {};
 893
 894     if (startingBehavior != StartingBehavior::NewSimulation)
 895     {
 896         load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr),
 897                         logFileHandle,
 898                         cr, domdecOptions.numCells,
 899                         inputrec, globalState.get(),
 900                         &observablesHistory,
 901                         mdrunOptions.reproducible);
 902
 903         if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
 904         {
 905             // Now we can start normal logging to the truncated log file.
 906             fplog    = gmx_fio_getfp(logFileHandle);
 907             prepareLogAppending(fplog);
 908             logOwner = buildLogger(fplog, cr);
 909             mdlog    = logOwner.logger();
 910         }
 911     }
 912
 913     if (mdrunOptions.numStepsCommandline > -2)
 914     {
 915         GMX_LOG(mdlog.info).asParagraph().
 916             appendText("The -nsteps functionality is deprecated, and may be removed in a future version. "
 917                        "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp file field.");
 918     }
 919     /* override nsteps with value set on the commamdline */
 920     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
 921
 922     if (SIMMASTER(cr))
 923     {
 924         copy_mat(globalState->box, box);
 925     }
 926
 927     if (PAR(cr))
 928     {
 929         gmx_bcast(sizeof(box), box, cr);
 930     }
 931
 932     if (inputrec->cutoff_scheme != ecutsVERLET)
 933     {
 934         gmx_fatal(FARGS, "This group-scheme .tpr file can no longer be run by mdrun. Please update to the Verlet scheme, or use an earlier version of GROMACS if necessary.");
 935     }
 936     /* Update rlist and nstlist. */
 937     prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
 938                           useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
 939
 940     LocalAtomSetManager atomSets;
 941
 942     if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
 943                      inputrec->eI == eiNM))
 944     {
 945         cr->dd = init_domain_decomposition(mdlog, cr, domdecOptions, mdrunOptions,
 946                                            &mtop, inputrec,
 947                                            box, positionsFromStatePointer(globalState.get()),
 948                                            &atomSets);
 949         // Note that local state still does not exist yet.
 950     }
 951     else
 952     {
 953         /* PME, if used, is done on all nodes with 1D decomposition */
 954         cr->npmenodes = 0;
 955         cr->duty      = (DUTY_PP | DUTY_PME);
 956
 957         if (inputrec->ePBC == epbcSCREW)
 958         {
 959             gmx_fatal(FARGS,
 960                       "pbc=screw is only implemented with domain decomposition");
 961         }
 962     }
 963
 964     if (PAR(cr))
 965     {
 966         /* After possible communicator splitting in make_dd_communicators.
 967          * we can set up the intra/inter node communication.
 968          */
 969         gmx_setup_nodecomm(fplog, cr);
 970     }
 971
 972 #if GMX_MPI
 973     if (isMultiSim(ms))
 974     {
 975         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
 976                 "This is simulation %d out of %d running as a composite GROMACS\n"
 977                 "multi-simulation job. Setup for this simulation:\n",
 978                 ms->sim, ms->nsim);
 979     }
 980     GMX_LOG(mdlog.warning).appendTextFormatted(
 981             "Using %d MPI %s\n",
 982             cr->nnodes,
 983 #if GMX_THREAD_MPI
 984             cr->nnodes == 1 ? "thread" : "threads"
 985 #else
 986             cr->nnodes == 1 ? "process" : "processes"
 987 #endif
 988             );
 989     fflush(stderr);
 990 #endif
 991
 992     // If mdrun -pin auto honors any affinity setting that already
 993     // exists. If so, it is nice to provide feedback about whether
 994     // that existing affinity setting was from OpenMP or something
 995     // else, so we run this code both before and after we initialize
 996     // the OpenMP support.
 997     gmx_check_thread_affinity_set(mdlog, cr,
 998                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
 999     /* Check and update the number of OpenMP threads requested */
1000     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
1001                                             pmeRunMode, mtop);
1002
1003     gmx_omp_nthreads_init(mdlog, cr,
1004                           hwinfo->nthreads_hw_avail,
1005                           physicalNodeComm.size_,
1006                           hw_opt.nthreads_omp,
1007                           hw_opt.nthreads_omp_pme,
1008                           !thisRankHasDuty(cr, DUTY_PP));
1009
1010     // Enable FP exception detection, but not in
1011     // Release mode and not for compilers with known buggy FP
1012     // exception support (clang with any optimization) or suspected
1013     // buggy FP exception support (gcc 7.* with optimization).
1014 #if !defined NDEBUG && \
1015     !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
1016     && defined __OPTIMIZE__)
1017     const bool bEnableFPE = true;
1018 #else
1019     const bool bEnableFPE = false;
1020 #endif
1021     //FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
1022     if (bEnableFPE)
1023     {
1024         gmx_feenableexcept();
1025     }
1026
1027     // Build a data structure that expresses which kinds of non-bonded
1028     // task are handled by this rank.
1029     //
1030     // TODO Later, this might become a loop over all registered modules
1031     // relevant to the mdp inputs, to find those that have such tasks.
1032     //
1033     // TODO This could move before init_domain_decomposition() as part
1034     // of refactoring that separates the responsibility for duty
1035     // assignment from setup for communication between tasks, and
1036     // setup for tasks handled with a domain (ie including short-ranged
1037     // tasks, bonded tasks, etc.).
1038     //
1039     // Note that in general useGpuForNonbonded, etc. can have a value
1040     // that is inconsistent with the presence of actual GPUs on any
1041     // rank, and that is not known to be a problem until the
1042     // duty of the ranks on a node become known.
1043     //
1044     // TODO Later we might need the concept of computeTasksOnThisRank,
1045     // from which we construct gpuTasksOnThisRank.
1046     //
1047     // Currently the DD code assigns duty to ranks that can
1048     // include PP work that currently can be executed on a single
1049     // GPU, if present and compatible.  This has to be coordinated
1050     // across PP ranks on a node, with possible multiple devices
1051     // or sharing devices on a node, either from the user
1052     // selection, or automatically.
1053     auto                 haveGpus = !gpuIdsToUse.empty();
1054     std::vector<GpuTask> gpuTasksOnThisRank;
1055     if (thisRankHasDuty(cr, DUTY_PP))
1056     {
1057         if (useGpuForNonbonded)
1058         {
1059             // Note that any bonded tasks on a GPU always accompany a
1060             // non-bonded task.
1061             if (haveGpus)
1062             {
1063                 gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
1064             }
1065             else if (nonbondedTarget == TaskTarget::Gpu)
1066             {
1067                 gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because there is none detected.");
1068             }
1069             else if (bondedTarget == TaskTarget::Gpu)
1070             {
1071                 gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because there is none detected.");
1072             }
1073         }
1074     }
1075     // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not.
1076     if (EEL_PME(inputrec->coulombtype) && (thisRankHasDuty(cr, DUTY_PME)))
1077     {
1078         if (useGpuForPme)
1079         {
1080             if (haveGpus)
1081             {
1082                 gpuTasksOnThisRank.push_back(GpuTask::Pme);
1083             }
1084             else if (pmeTarget == TaskTarget::Gpu)
1085             {
1086                 gmx_fatal(FARGS, "Cannot run PME on a GPU because there is none detected.");
1087             }
1088         }
1089     }
1090
1091     GpuTaskAssignment gpuTaskAssignment;
1092     try
1093     {
1094         // Produce the task assignment for this rank.
1095         gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
1096                                               mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank,
1097                                               useGpuForBonded, pmeRunMode);
1098     }
1099     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1100
1101     /* Prevent other ranks from continuing after an issue was found
1102      * and reported as a fatal error.
1103      *
1104      * TODO This function implements a barrier so that MPI runtimes
1105      * can organize an orderly shutdown if one of the ranks has had to
1106      * issue a fatal error in various code already run. When we have
1107      * MPI-aware error handling and reporting, this should be
1108      * improved. */
1109 #if GMX_MPI
1110     if (PAR(cr))
1111     {
1112         MPI_Barrier(cr->mpi_comm_mysim);
1113     }
1114     if (isMultiSim(ms))
1115     {
1116         if (SIMMASTER(cr))
1117         {
1118             MPI_Barrier(ms->mpi_comm_masters);
1119         }
1120         /* We need another barrier to prevent non-master ranks from contiuing
1121          * when an error occured in a different simulation.
1122          */
1123         MPI_Barrier(cr->mpi_comm_mysim);
1124     }
1125 #endif
1126
1127     /* Now that we know the setup is consistent, check for efficiency */
1128     check_resource_division_efficiency(hwinfo, !gpuTaskAssignment.empty(), mdrunOptions.ntompOptionIsSet,
1129                                        cr, mdlog);
1130
1131     gmx_device_info_t *nonbondedDeviceInfo = nullptr;
1132
1133     if (thisRankHasDuty(cr, DUTY_PP))
1134     {
1135         // This works because only one task of each type is currently permitted.
1136         auto nbGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(),
1137                                              hasTaskType<GpuTask::Nonbonded>);
1138         if (nbGpuTaskMapping != gpuTaskAssignment.end())
1139         {
1140             int nonbondedDeviceId = nbGpuTaskMapping->deviceId_;
1141             nonbondedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, nonbondedDeviceId);
1142             init_gpu(nonbondedDeviceInfo);
1143
1144             if (DOMAINDECOMP(cr))
1145             {
1146                 /* When we share GPUs over ranks, we need to know this for the DLB */
1147                 dd_setup_dlb_resource_sharing(cr, nonbondedDeviceId);
1148             }
1149
1150         }
1151     }
1152
1153     std::unique_ptr<ClfftInitializer> initializedClfftLibrary;
1154
1155     gmx_device_info_t                *pmeDeviceInfo = nullptr;
1156     // Later, this program could contain kernels that might be later
1157     // re-used as auto-tuning progresses, or subsequent simulations
1158     // are invoked.
1159     PmeGpuProgramStorage pmeGpuProgram;
1160     // This works because only one task of each type is currently permitted.
1161     auto                 pmeGpuTaskMapping     = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), hasTaskType<GpuTask::Pme>);
1162     const bool           thisRankHasPmeGpuTask = (pmeGpuTaskMapping != gpuTaskAssignment.end());
1163     if (thisRankHasPmeGpuTask)
1164     {
1165         pmeDeviceInfo = getDeviceInfo(hwinfo->gpu_info, pmeGpuTaskMapping->deviceId_);
1166         init_gpu(pmeDeviceInfo);
1167         pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
1168         // TODO It would be nice to move this logic into the factory
1169         // function. See Redmine #2535.
1170         bool isMasterThread = !GMX_THREAD_MPI || MASTER(cr);
1171         if (pmeRunMode == PmeRunMode::GPU && !initializedClfftLibrary && isMasterThread)
1172         {
1173             initializedClfftLibrary = initializeClfftLibrary();
1174         }
1175     }
1176
1177     /* getting number of PP/PME threads on this MPI / tMPI rank.
1178        PME: env variable should be read only on one node to make sure it is
1179        identical everywhere;
1180      */
1181     const int numThreadsOnThisRank =
1182         thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded) : gmx_omp_nthreads_get(emntPME);
1183     checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid,
1184                                   *hwinfo->hardwareTopology,
1185                                   physicalNodeComm, mdlog);
1186
1187     if (hw_opt.thread_affinity != threadaffOFF)
1188     {
1189         /* Before setting affinity, check whether the affinity has changed
1190          * - which indicates that probably the OpenMP library has changed it
1191          * since we first checked).
1192          */
1193         gmx_check_thread_affinity_set(mdlog, cr,
1194                                       &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
1195
1196         int numThreadsOnThisNode, intraNodeThreadOffset;
1197         analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
1198                                  &intraNodeThreadOffset);
1199
1200         /* Set the CPU affinity */
1201         gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology,
1202                                 numThreadsOnThisRank, numThreadsOnThisNode,
1203                                 intraNodeThreadOffset, nullptr);
1204     }
1205
1206     if (mdrunOptions.timingOptions.resetStep > -1)
1207     {
1208         GMX_LOG(mdlog.info).asParagraph().
1209             appendText("The -resetstep functionality is deprecated, and may be removed in a future version.");
1210     }
1211     wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1212
1213     if (PAR(cr))
1214     {
1215         /* Master synchronizes its value of reset_counters with all nodes
1216          * including PME only nodes */
1217         int64_t reset_counters = wcycle_get_reset_counters(wcycle);
1218         gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
1219         wcycle_set_reset_counters(wcycle, reset_counters);
1220     }
1221
1222     // Membrane embedding must be initialized before we call init_forcerec()
1223     if (doMembed)
1224     {
1225         if (MASTER(cr))
1226         {
1227             fprintf(stderr, "Initializing membed");
1228         }
1229         /* Note that membed cannot work in parallel because mtop is
1230          * changed here. Fix this if we ever want to make it run with
1231          * multiple ranks. */
1232         membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec, globalState.get(), cr,
1233                              &mdrunOptions
1234                                  .checkpointOptions.period);
1235     }
1236
1237     std::unique_ptr<MDAtoms>     mdAtoms;
1238     std::unique_ptr<gmx_vsite_t> vsite;
1239
1240     snew(nrnb, 1);
1241     if (thisRankHasDuty(cr, DUTY_PP))
1242     {
1243         /* Initiate forcerecord */
1244         fr                 = new t_forcerec;
1245         fr->forceProviders = mdModules_->initForceProviders();
1246         init_forcerec(fplog, mdlog, fr, fcd,
1247                       inputrec, &mtop, cr, box,
1248                       opt2fn("-table", filenames.size(), filenames.data()),
1249                       opt2fn("-tablep", filenames.size(), filenames.data()),
1250                       opt2fns("-tableb", filenames.size(), filenames.data()),
1251                       *hwinfo, nonbondedDeviceInfo,
1252                       useGpuForBonded,
1253                       FALSE,
1254                       pforce);
1255
1256         /* Initialize the mdAtoms structure.
1257          * mdAtoms is not filled with atom data,
1258          * as this can not be done now with domain decomposition.
1259          */
1260         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1261         if (globalState && thisRankHasPmeGpuTask)
1262         {
1263             // The pinning of coordinates in the global state object works, because we only use
1264             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1265             // points to the global state object without DD.
1266             // FIXME: MD and EM separately set up the local state - this should happen in the same function,
1267             // which should also perform the pinning.
1268             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1269         }
1270
1271         /* Initialize the virtual site communication */
1272         vsite = initVsite(mtop, cr);
1273
1274         calc_shifts(box, fr->shift_vec);
1275
1276         /* With periodic molecules the charge groups should be whole at start up
1277          * and the virtual sites should not be far from their proper positions.
1278          */
1279         if (!inputrec->bContinuation && MASTER(cr) &&
1280             !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
1281         {
1282             /* Make molecules whole at start of run */
1283             if (fr->ePBC != epbcNONE)
1284             {
1285                 do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
1286             }
1287             if (vsite)
1288             {
1289                 /* Correct initial vsite positions are required
1290                  * for the initial distribution in the domain decomposition
1291                  * and for the initial shell prediction.
1292                  */
1293                 constructVsitesGlobal(mtop, globalState->x);
1294             }
1295         }
1296
1297         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1298         {
1299             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1300             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1301         }
1302     }
1303     else
1304     {
1305         /* This is a PME only node */
1306
1307         GMX_ASSERT(globalState == nullptr, "We don't need the state on a PME only rank and expect it to be unitialized");
1308
1309         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1310         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1311     }
1312
1313     gmx_pme_t *sepPmeData = nullptr;
1314     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1315     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), "Double-checking that only PME-only ranks have no forcerec");
1316     gmx_pme_t * &pmedata = fr ? fr->pmedata : sepPmeData;
1317
1318     /* Initiate PME if necessary,
1319      * either on all nodes or on dedicated PME nodes only. */
1320     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1321     {
1322         if (mdAtoms && mdAtoms->mdatoms())
1323         {
1324             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1325             if (EVDW_PME(inputrec->vdwtype))
1326             {
1327                 nTypePerturbed   = mdAtoms->mdatoms()->nTypePerturbed;
1328             }
1329         }
1330         if (cr->npmenodes > 0)
1331         {
1332             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1333             gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
1334             gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
1335         }
1336
1337         if (thisRankHasDuty(cr, DUTY_PME))
1338         {
1339             try
1340             {
1341                 pmedata = gmx_pme_init(cr,
1342                                        getNumPmeDomains(cr->dd),
1343                                        inputrec,
1344                                        nChargePerturbed != 0, nTypePerturbed != 0,
1345                                        mdrunOptions.reproducible,
1346                                        ewaldcoeff_q, ewaldcoeff_lj,
1347                                        gmx_omp_nthreads_get(emntPME),
1348                                        pmeRunMode, nullptr,
1349                                        pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
1350             }
1351             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1352         }
1353     }
1354
1355
1356     if (EI_DYNAMICS(inputrec->eI))
1357     {
1358         /* Turn on signal handling on all nodes */
1359         /*
1360          * (A user signal from the PME nodes (if any)
1361          * is communicated to the PP nodes.
1362          */
1363         signal_handler_install();
1364     }
1365
1366     pull_t *pull_work = nullptr;
1367     if (thisRankHasDuty(cr, DUTY_PP))
1368     {
1369         /* Assumes uniform use of the number of OpenMP threads */
1370         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
1371
1372         if (inputrec->bPull)
1373         {
1374             /* Initialize pull code */
1375             pull_work =
1376                 init_pull(fplog, inputrec->pull, inputrec,
1377                           &mtop, cr, &atomSets, inputrec->fepvals->init_lambda);
1378             if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
1379             {
1380                 initPullHistory(pull_work, &observablesHistory);
1381             }
1382             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1383             {
1384                 init_pull_output_files(pull_work,
1385                                        filenames.size(), filenames.data(), oenv,
1386                                        startingBehavior);
1387             }
1388         }
1389
1390         std::unique_ptr<EnforcedRotation> enforcedRotation;
1391         if (inputrec->bRot)
1392         {
1393             /* Initialize enforced rotation code */
1394             enforcedRotation = init_rot(fplog,
1395                                         inputrec,
1396                                         filenames.size(),
1397                                         filenames.data(),
1398                                         cr,
1399                                         &atomSets,
1400                                         globalState.get(),
1401                                         &mtop,
1402                                         oenv,
1403                                         mdrunOptions,
1404                                         startingBehavior);
1405         }
1406
1407         t_swap *swap = nullptr;
1408         if (inputrec->eSwapCoords != eswapNO)
1409         {
1410             /* Initialize ion swapping code */
1411             swap = init_swapcoords(fplog, inputrec,
1412                                    opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
1413                                    &mtop, globalState.get(), &observablesHistory,
1414                                    cr, &atomSets, oenv, mdrunOptions,
1415                                    startingBehavior);
1416         }
1417
1418         /* Let makeConstraints know whether we have essential dynamics constraints.
1419          * TODO: inputrec should tell us whether we use an algorithm, not a file option or the checkpoint
1420          */
1421         bool doEssentialDynamics = (opt2fn_null("-ei", filenames.size(), filenames.data()) != nullptr
1422                                     || observablesHistory.edsamHistory);
1423         auto constr              = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics,
1424                                                    fplog, *mdAtoms->mdatoms(),
1425                                                    cr, ms, nrnb, wcycle, fr->bMolPBC);
1426
1427         /* Energy terms and groups */
1428         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(), inputrec->fepvals->n_lambda);
1429
1430         /* Set up interactive MD (IMD) */
1431         auto imdSession = makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
1432                                          MASTER(cr) ? globalState->x.rvec_array() : nullptr,
1433                                          filenames.size(), filenames.data(), oenv, mdrunOptions.imdOptions,
1434                                          startingBehavior);
1435
1436         if (DOMAINDECOMP(cr))
1437         {
1438             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1439             /* This call is not included in init_domain_decomposition mainly
1440              * because fr->cginfo_mb is set later.
1441              */
1442             dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
1443                             domdecOptions.checkBondedInteractions,
1444                             fr->cginfo_mb);
1445         }
1446
1447         // TODO This is not the right place to manage the lifetime of
1448         // this data structure, but currently it's the easiest way to
1449         // make it work. Later, it should probably be made/updated
1450         // after the workload for the lifetime of a PP domain is
1451         // understood.
1452         PpForceWorkload ppForceWorkload;
1453
1454         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
1455         /* Now do whatever the user wants us to do (how flexible...) */
1456         Simulator simulator {
1457             fplog, cr, ms, mdlog, static_cast<int>(filenames.size()), filenames.data(),
1458             oenv,
1459             mdrunOptions,
1460             startingBehavior,
1461             vsite.get(), constr.get(),
1462             enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
1463             deform.get(),
1464             mdModules_->outputProvider(),
1465             inputrec, imdSession.get(), pull_work, swap, &mtop,
1466             fcd,
1467             globalState.get(),
1468             &observablesHistory,
1469             mdAtoms.get(), nrnb, wcycle, fr,
1470             &enerd,
1471             &ppForceWorkload,
1472             replExParams,
1473             membed,
1474             walltime_accounting,
1475             std::move(stopHandlerBuilder_)
1476         };
1477         simulator.run(inputrec->eI, doRerun);
1478
1479         if (inputrec->bPull)
1480         {
1481             finish_pull(pull_work);
1482         }
1483         finish_swapcoords(swap);
1484     }
1485     else
1486     {
1487         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
1488         /* do PME only */
1489         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
1490         gmx_pmeonly(pmedata, cr, nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
1491     }
1492
1493     wallcycle_stop(wcycle, ewcRUN);
1494
1495     /* Finish up, write some stuff
1496      * if rerunMD, don't write last frame again
1497      */
1498     finish_run(fplog, mdlog, cr,
1499                inputrec, nrnb, wcycle, walltime_accounting,
1500                fr ? fr->nbv.get() : nullptr,
1501                pmedata,
1502                EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
1503
1504     // clean up cycle counter
1505     wallcycle_destroy(wcycle);
1506
1507 // Free PME data
1508     if (pmedata)
1509     {
1510         gmx_pme_destroy(pmedata);
1511         pmedata = nullptr;
1512     }
1513
1514     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
1515     // before we destroy the GPU context(s) in free_gpu_resources().
1516     // Pinned buffers are associated with contexts in CUDA.
1517     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
1518     mdAtoms.reset(nullptr);
1519     globalState.reset(nullptr);
1520     mdModules_.reset(nullptr);   // destruct force providers here as they might also use the GPU
1521
1522     /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
1523     free_gpu_resources(fr, physicalNodeComm);
1524     free_gpu(nonbondedDeviceInfo);
1525     free_gpu(pmeDeviceInfo);
1526     done_forcerec(fr, mtop.molblock.size());
1527     sfree(fcd);
1528
1529     if (doMembed)
1530     {
1531         free_membed(membed);
1532     }
1533
1534     /* Does what it says */
1535     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
1536     walltime_accounting_destroy(walltime_accounting);
1537     sfree(nrnb);
1538
1539     // Ensure log file content is written
1540     if (logFileHandle)
1541     {
1542         gmx_fio_flush(logFileHandle);
1543     }
1544
1545     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
1546      * exceptions were enabled before function was called. */
1547     if (bEnableFPE)
1548     {
1549         gmx_fedisableexcept();
1550     }
1551
1552     auto rc = static_cast<int>(gmx_get_stop_condition());
1553
1554 #if GMX_THREAD_MPI
1555     /* we need to join all threads. The sub-threads join when they
1556        exit this function, but the master thread needs to be told to
1557        wait for that. */
1558     if (PAR(cr) && MASTER(cr))
1559     {
1560         tMPI_Finalize();
1561     }
1562     //TODO free commrec in MPI simulations
1563     done_commrec(cr);
1564 #endif
1565     return rc;
1566 }
1567
1568 Mdrunner::~Mdrunner()
1569 {
1570     // Clean up of the Manager.
1571     // This will end up getting called on every thread-MPI rank, which is unnecessary,
1572     // but okay as long as threads synchronize some time before adding or accessing
1573     // a new set of restraints.
1574     if (restraintManager_)
1575     {
1576         restraintManager_->clear();
1577         GMX_ASSERT(restraintManager_->countRestraints() == 0,
1578                    "restraints added during runner life time should be cleared at runner destruction.");
1579     }
1580 };
1581
1582 void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller,
1583                             std::string                               name)
1584 {
1585     GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
1586     // Not sure if this should be logged through the md logger or something else,
1587     // but it is helpful to have some sort of INFO level message sent somewhere.
1588     //    std::cout << "Registering restraint named " << name << std::endl;
1589
1590     // When multiple restraints are used, it may be wasteful to register them separately.
1591     // Maybe instead register an entire Restraint Manager as a force provider.
1592     restraintManager_->addToSpec(std::move(puller),
1593                                  std::move(name));
1594 }
1595
1596 Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules)
1597     : mdModules_(std::move(mdModules))
1598 {
1599 }
1600
1601 Mdrunner::Mdrunner(Mdrunner &&) noexcept = default;
1602
1603 //NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
1604 Mdrunner &Mdrunner::operator=(Mdrunner && /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
1605
1606 class Mdrunner::BuilderImplementation
1607 {
1608     public:
1609         BuilderImplementation() = delete;
1610         BuilderImplementation(std::unique_ptr<MDModules> mdModules,
1611                               SimulationContext        * context);
1612         ~BuilderImplementation();
1613
1614         BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options,
1615                                                     real                forceWarningThreshold,
1616                                                     StartingBehavior    startingBehavior);
1617
1618         void addDomdec(const DomdecOptions &options);
1619
1620         void addVerletList(int nstlist);
1621
1622         void addReplicaExchange(const ReplicaExchangeParameters &params);
1623
1624         void addMultiSim(gmx_multisim_t* multisim);
1625
1626         void addNonBonded(const char* nbpu_opt);
1627
1628         void addPME(const char* pme_opt_, const char* pme_fft_opt_);
1629
1630         void addBondedTaskAssignment(const char* bonded_opt);
1631
1632         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
1633
1634         void addFilenames(ArrayRef <const t_filenm> filenames);
1635
1636         void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
1637
1638         void addLogFile(t_fileio *logFileHandle);
1639
1640         void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
1641
1642         Mdrunner build();
1643
1644     private:
1645
1646         // Default parameters copied from runner.h
1647         // \todo Clarify source(s) of default parameters.
1648
1649         const char* nbpu_opt_    = nullptr;
1650         const char* pme_opt_     = nullptr;
1651         const char* pme_fft_opt_ = nullptr;
1652         const char *bonded_opt_  = nullptr;
1653
1654         MdrunOptions                          mdrunOptions_;
1655
1656         DomdecOptions                         domdecOptions_;
1657
1658         ReplicaExchangeParameters             replicaExchangeParameters_;
1659
1660         //! Command-line override for the duration of a neighbor list with the Verlet scheme.
1661         int         nstlist_ = 0;
1662
1663         //! Non-owning multisim communicator handle.
1664         std::unique_ptr<gmx_multisim_t*>      multisim_ = nullptr;
1665
1666         //! Print a warning if any force is larger than this (in kJ/mol nm).
1667         real forceWarningThreshold_ = -1;
1668
1669         //! Whether the simulation will start afresh, or restart with/without appending.
1670         StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
1671
1672         //! The modules that comprise the functionality of mdrun.
1673         std::unique_ptr<MDModules> mdModules_;
1674
1675         /*! \brief  Non-owning pointer to SimulationContext (owned and managed by client)
1676          *
1677          * \internal
1678          * \todo Establish robust protocol to make sure resources remain valid.
1679          * SimulationContext will likely be separated into multiple layers for
1680          * different levels of access and different phases of execution. Ref
1681          * https://redmine.gromacs.org/issues/2375
1682          * https://redmine.gromacs.org/issues/2587
1683          */
1684         SimulationContext* context_ = nullptr;
1685
1686         //! \brief Parallelism information.
1687         gmx_hw_opt_t hardwareOptions_;
1688
1689         //! filename options for simulation.
1690         ArrayRef<const t_filenm> filenames_;
1691
1692         /*! \brief Handle to output environment.
1693          *
1694          * \todo gmx_output_env_t needs lifetime management.
1695          */
1696         gmx_output_env_t*    outputEnvironment_ = nullptr;
1697
1698         /*! \brief Non-owning handle to MD log file.
1699          *
1700          * \todo Context should own output facilities for client.
1701          * \todo Improve log file handle management.
1702          * \internal
1703          * Code managing the FILE* relies on the ability to set it to
1704          * nullptr to check whether the filehandle is valid.
1705          */
1706         t_fileio* logFileHandle_ = nullptr;
1707
1708         /*!
1709          * \brief Builder for simulation stop signal handler.
1710          */
1711         std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
1712 };
1713
1714 Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules,
1715                                                        SimulationContext        * context) :
1716     mdModules_(std::move(mdModules)),
1717     context_(context)
1718 {
1719     GMX_ASSERT(context_, "Bug found. It should not be possible to construct builder without a valid context.");
1720 }
1721
1722 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
1723
1724 Mdrunner::BuilderImplementation &
1725 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions    &options,
1726                                                       const real             forceWarningThreshold,
1727                                                       const StartingBehavior startingBehavior)
1728 {
1729     mdrunOptions_          = options;
1730     forceWarningThreshold_ = forceWarningThreshold;
1731     startingBehavior_      = startingBehavior;
1732     return *this;
1733 }
1734
1735 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions &options)
1736 {
1737     domdecOptions_ = options;
1738 }
1739
1740 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
1741 {
1742     nstlist_ = nstlist;
1743 }
1744
1745 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters &params)
1746 {
1747     replicaExchangeParameters_ = params;
1748 }
1749
1750 void Mdrunner::BuilderImplementation::addMultiSim(gmx_multisim_t* multisim)
1751 {
1752     multisim_ = std::make_unique<gmx_multisim_t*>(multisim);
1753 }
1754
1755 Mdrunner Mdrunner::BuilderImplementation::build()
1756 {
1757     auto newRunner = Mdrunner(std::move(mdModules_));
1758
1759     GMX_ASSERT(context_, "Bug found. It should not be possible to call build() without a valid context.");
1760
1761     newRunner.mdrunOptions          = mdrunOptions_;
1762     newRunner.pforce                = forceWarningThreshold_;
1763     newRunner.startingBehavior      = startingBehavior_;
1764     newRunner.domdecOptions         = domdecOptions_;
1765
1766     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
1767     newRunner.hw_opt          = hardwareOptions_;
1768
1769     // No invariant to check. This parameter exists to optionally override other behavior.
1770     newRunner.nstlist_cmdline = nstlist_;
1771
1772     newRunner.replExParams    = replicaExchangeParameters_;
1773
1774     newRunner.filenames = filenames_;
1775
1776     GMX_ASSERT(context_->communicationRecord_, "SimulationContext communications not initialized.");
1777     newRunner.cr = context_->communicationRecord_;
1778
1779     if (multisim_)
1780     {
1781         // nullptr is a valid value for the multisim handle, so we don't check the pointed-to pointer.
1782         newRunner.ms = *multisim_;
1783     }
1784     else
1785     {
1786         GMX_THROW(gmx::APIError("MdrunnerBuilder::addMultiSim() is required before build()"));
1787     }
1788
1789     // \todo Clarify ownership and lifetime management for gmx_output_env_t
1790     // \todo Update sanity checking when output environment has clearly specified invariants.
1791     // Initialization and default values for oenv are not well specified in the current version.
1792     if (outputEnvironment_)
1793     {
1794         newRunner.oenv  = outputEnvironment_;
1795     }
1796     else
1797     {
1798         GMX_THROW(gmx::APIError("MdrunnerBuilder::addOutputEnvironment() is required before build()"));
1799     }
1800
1801     newRunner.logFileHandle = logFileHandle_;
1802
1803     if (nbpu_opt_)
1804     {
1805         newRunner.nbpu_opt    = nbpu_opt_;
1806     }
1807     else
1808     {
1809         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
1810     }
1811
1812     if (pme_opt_ && pme_fft_opt_)
1813     {
1814         newRunner.pme_opt     = pme_opt_;
1815         newRunner.pme_fft_opt = pme_fft_opt_;
1816     }
1817     else
1818     {
1819         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
1820     }
1821
1822     if (bonded_opt_)
1823     {
1824         newRunner.bonded_opt = bonded_opt_;
1825     }
1826     else
1827     {
1828         GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
1829     }
1830
1831     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
1832
1833     if (stopHandlerBuilder_)
1834     {
1835         newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
1836     }
1837     else
1838     {
1839         newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
1840     }
1841
1842     return newRunner;
1843 }
1844
1845 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
1846 {
1847     nbpu_opt_ = nbpu_opt;
1848 }
1849
1850 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
1851                                              const char* pme_fft_opt)
1852 {
1853     pme_opt_     = pme_opt;
1854     pme_fft_opt_ = pme_fft_opt;
1855 }
1856
1857 void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
1858 {
1859     bonded_opt_ = bonded_opt;
1860 }
1861
1862 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
1863 {
1864     hardwareOptions_ = hardwareOptions;
1865 }
1866
1867 void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
1868 {
1869     filenames_ = filenames;
1870 }
1871
1872 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
1873 {
1874     outputEnvironment_ = outputEnvironment;
1875 }
1876
1877 void Mdrunner::BuilderImplementation::addLogFile(t_fileio *logFileHandle)
1878 {
1879     logFileHandle_ = logFileHandle;
1880 }
1881
1882 void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
1883 {
1884     stopHandlerBuilder_ = std::move(builder);
1885 }
1886
1887 MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
1888                                  compat::not_null<SimulationContext*> context) :
1889     impl_ {std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules), context)}
1890 {
1891 }
1892
1893 MdrunnerBuilder::~MdrunnerBuilder() = default;
1894
1895 MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions    &options,
1896                                                       real                   forceWarningThreshold,
1897                                                       const StartingBehavior startingBehavior)
1898 {
1899     impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
1900     return *this;
1901 }
1902
1903 MdrunnerBuilder &MdrunnerBuilder::addDomainDecomposition(const DomdecOptions &options)
1904 {
1905     impl_->addDomdec(options);
1906     return *this;
1907 }
1908
1909 MdrunnerBuilder &MdrunnerBuilder::addNeighborList(int nstlist)
1910 {
1911     impl_->addVerletList(nstlist);
1912     return *this;
1913 }
1914
1915 MdrunnerBuilder &MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters &params)
1916 {
1917     impl_->addReplicaExchange(params);
1918     return *this;
1919 }
1920
1921 MdrunnerBuilder &MdrunnerBuilder::addMultiSim(gmx_multisim_t* multisim)
1922 {
1923     impl_->addMultiSim(multisim);
1924     return *this;
1925 }
1926
1927 MdrunnerBuilder &MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
1928 {
1929     impl_->addNonBonded(nbpu_opt);
1930     return *this;
1931 }
1932
1933 MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
1934                                                     const char* pme_fft_opt)
1935 {
1936     // The builder method may become more general in the future, but in this version,
1937     // parameters for PME electrostatics are both required and the only parameters
1938     // available.
1939     if (pme_opt && pme_fft_opt)
1940     {
1941         impl_->addPME(pme_opt, pme_fft_opt);
1942     }
1943     else
1944     {
1945         GMX_THROW(gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
1946     }
1947     return *this;
1948 }
1949
1950 MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
1951 {
1952     impl_->addBondedTaskAssignment(bonded_opt);
1953     return *this;
1954 }
1955
1956 Mdrunner MdrunnerBuilder::build()
1957 {
1958     return impl_->build();
1959 }
1960
1961 MdrunnerBuilder &MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
1962 {
1963     impl_->addHardwareOptions(hardwareOptions);
1964     return *this;
1965 }
1966
1967 MdrunnerBuilder &MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
1968 {
1969     impl_->addFilenames(filenames);
1970     return *this;
1971 }
1972
1973 MdrunnerBuilder &MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
1974 {
1975     impl_->addOutputEnvironment(outputEnvironment);
1976     return *this;
1977 }
1978
1979 MdrunnerBuilder &MdrunnerBuilder::addLogFile(t_fileio *logFileHandle)
1980 {
1981     impl_->addLogFile(logFileHandle);
1982     return *this;
1983 }
1984
1985 MdrunnerBuilder &MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
1986 {
1987     impl_->addStopHandlerBuilder(std::move(builder));
1988     return *this;
1989 }
1990
1991 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder &&) noexcept = default;
1992
1993 MdrunnerBuilder &MdrunnerBuilder::operator=(MdrunnerBuilder &&) noexcept = default;
1994
1995 } // namespace gmx