Change MPI setup to communicate TPR as buffer
[alexxy/gromacs.git] / src / gromacs / mdrun / runner.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief Implements the MD runner routine calling all integrators.
40  *
41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42  * \ingroup module_mdrun
43  */
44 #include "gmxpre.h"
45
46 #include "runner.h"
47
48 #include "config.h"
49
50 #include <cassert>
51 #include <cinttypes>
52 #include <csignal>
53 #include <cstdlib>
54 #include <cstring>
55
56 #include <algorithm>
57 #include <memory>
58
59 #include "gromacs/commandline/filenm.h"
60 #include "gromacs/domdec/domdec.h"
61 #include "gromacs/domdec/domdec_struct.h"
62 #include "gromacs/domdec/gpuhaloexchange.h"
63 #include "gromacs/domdec/localatomsetmanager.h"
64 #include "gromacs/domdec/partition.h"
65 #include "gromacs/ewald/ewald_utils.h"
66 #include "gromacs/ewald/pme.h"
67 #include "gromacs/ewald/pme_gpu_program.h"
68 #include "gromacs/fileio/checkpoint.h"
69 #include "gromacs/fileio/gmxfio.h"
70 #include "gromacs/fileio/oenv.h"
71 #include "gromacs/fileio/tpxio.h"
72 #include "gromacs/gmxlib/network.h"
73 #include "gromacs/gmxlib/nrnb.h"
74 #include "gromacs/gpu_utils/gpu_utils.h"
75 #include "gromacs/hardware/cpuinfo.h"
76 #include "gromacs/hardware/detecthardware.h"
77 #include "gromacs/hardware/printhardware.h"
78 #include "gromacs/imd/imd.h"
79 #include "gromacs/listed_forces/disre.h"
80 #include "gromacs/listed_forces/gpubonded.h"
81 #include "gromacs/listed_forces/orires.h"
82 #include "gromacs/math/functions.h"
83 #include "gromacs/math/utilities.h"
84 #include "gromacs/math/vec.h"
85 #include "gromacs/mdlib/boxdeformation.h"
86 #include "gromacs/mdlib/broadcaststructs.h"
87 #include "gromacs/mdlib/calc_verletbuf.h"
88 #include "gromacs/mdlib/dispersioncorrection.h"
89 #include "gromacs/mdlib/enerdata_utils.h"
90 #include "gromacs/mdlib/force.h"
91 #include "gromacs/mdlib/forcerec.h"
92 #include "gromacs/mdlib/gmx_omp_nthreads.h"
93 #include "gromacs/mdlib/makeconstraints.h"
94 #include "gromacs/mdlib/md_support.h"
95 #include "gromacs/mdlib/mdatoms.h"
96 #include "gromacs/mdlib/membed.h"
97 #include "gromacs/mdlib/qmmm.h"
98 #include "gromacs/mdlib/sighandler.h"
99 #include "gromacs/mdlib/stophandler.h"
100 #include "gromacs/mdrun/mdmodules.h"
101 #include "gromacs/mdrun/simulationcontext.h"
102 #include "gromacs/mdrunutility/handlerestart.h"
103 #include "gromacs/mdrunutility/logging.h"
104 #include "gromacs/mdrunutility/multisim.h"
105 #include "gromacs/mdrunutility/printtime.h"
106 #include "gromacs/mdrunutility/threadaffinity.h"
107 #include "gromacs/mdtypes/commrec.h"
108 #include "gromacs/mdtypes/enerdata.h"
109 #include "gromacs/mdtypes/fcdata.h"
110 #include "gromacs/mdtypes/group.h"
111 #include "gromacs/mdtypes/inputrec.h"
112 #include "gromacs/mdtypes/md_enums.h"
113 #include "gromacs/mdtypes/mdrunoptions.h"
114 #include "gromacs/mdtypes/observableshistory.h"
115 #include "gromacs/mdtypes/simulation_workload.h"
116 #include "gromacs/mdtypes/state.h"
117 #include "gromacs/nbnxm/gpu_data_mgmt.h"
118 #include "gromacs/nbnxm/nbnxm.h"
119 #include "gromacs/nbnxm/pairlist_tuning.h"
120 #include "gromacs/pbcutil/pbc.h"
121 #include "gromacs/pulling/output.h"
122 #include "gromacs/pulling/pull.h"
123 #include "gromacs/pulling/pull_rotation.h"
124 #include "gromacs/restraint/manager.h"
125 #include "gromacs/restraint/restraintmdmodule.h"
126 #include "gromacs/restraint/restraintpotential.h"
127 #include "gromacs/swap/swapcoords.h"
128 #include "gromacs/taskassignment/decidegpuusage.h"
129 #include "gromacs/taskassignment/resourcedivision.h"
130 #include "gromacs/taskassignment/taskassignment.h"
131 #include "gromacs/taskassignment/usergpuids.h"
132 #include "gromacs/timing/gpu_timing.h"
133 #include "gromacs/timing/wallcycle.h"
134 #include "gromacs/timing/wallcyclereporting.h"
135 #include "gromacs/topology/mtop_util.h"
136 #include "gromacs/trajectory/trajectoryframe.h"
137 #include "gromacs/utility/basenetwork.h"
138 #include "gromacs/utility/cstringutil.h"
139 #include "gromacs/utility/exceptions.h"
140 #include "gromacs/utility/fatalerror.h"
141 #include "gromacs/utility/filestream.h"
142 #include "gromacs/utility/gmxassert.h"
143 #include "gromacs/utility/gmxmpi.h"
144 #include "gromacs/utility/keyvaluetree.h"
145 #include "gromacs/utility/logger.h"
146 #include "gromacs/utility/loggerbuilder.h"
147 #include "gromacs/utility/mdmodulenotification.h"
148 #include "gromacs/utility/physicalnodecommunicator.h"
149 #include "gromacs/utility/pleasecite.h"
150 #include "gromacs/utility/programcontext.h"
151 #include "gromacs/utility/smalloc.h"
152 #include "gromacs/utility/stringutil.h"
153
154 #include "isimulator.h"
155 #include "replicaexchange.h"
156 #include "simulatorbuilder.h"
157
158 #if GMX_FAHCORE
159 #include "corewrap.h"
160 #endif
161
162 namespace gmx
163 {
164
165 /*! \brief environment variable to enable GPU P2P communication */
166 static const bool c_enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr)
167     && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
168
169 /*! \brief environment variable to enable GPU buffer operations */
170 static const bool c_enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
171
172 /*! \brief Manage any development feature flag variables encountered
173  *
174  * The use of dev features indicated by environment variables is
175  * logged in order to ensure that runs with such featrues enabled can
176  * be identified from their log and standard output. Any cross
177  * dependencies are also checked, and if unsatisified, a fatal error
178  * issued.
179  *
180  * \param[in]  mdlog        Logger object.
181  */
182 static void manageDevelopmentFeatures(const gmx::MDLogger &mdlog)
183 {
184     const bool enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
185     const bool useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
186     const bool enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
187
188     if (enableGpuBufOps)
189     {
190         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
191                 "NOTE: This run uses the 'GPU buffer ops' feature, enabled by the GMX_USE_GPU_BUFFER_OPS environment variable.");
192     }
193
194     if (enableGpuHaloExchange)
195     {
196         if (!enableGpuBufOps)
197         {
198             gmx_fatal(FARGS, "Cannot enable GPU halo exchange without GPU buffer operations, set GMX_USE_GPU_BUFFER_OPS=1\n");
199         }
200     }
201
202     if (useGpuUpdateConstrain)
203     {
204         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
205                 "NOTE: This run uses the 'GPU update/constraints' feature, enabled by the GMX_UPDATE_CONSTRAIN_GPU environment variable.");
206     }
207
208 }
209
210 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
211  *
212  * Used to ensure that the master thread does not modify mdrunner during copy
213  * on the spawned threads. */
214 static void threadMpiMdrunnerAccessBarrier()
215 {
216 #if GMX_THREAD_MPI
217     MPI_Barrier(MPI_COMM_WORLD);
218 #endif
219 }
220
221 Mdrunner Mdrunner::cloneOnSpawnedThread() const
222 {
223     auto newRunner = Mdrunner(std::make_unique<MDModules>());
224
225     // All runners in the same process share a restraint manager resource because it is
226     // part of the interface to the client code, which is associated only with the
227     // original thread. Handles to the same resources can be obtained by copy.
228     {
229         newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
230     }
231
232     // Copy original cr pointer before master thread can pass the thread barrier
233     newRunner.cr  = reinitialize_commrec_for_this_thread(cr);
234
235     // Copy members of master runner.
236     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
237     // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
238     newRunner.hw_opt    = hw_opt;
239     newRunner.filenames = filenames;
240
241     newRunner.oenv                = oenv;
242     newRunner.mdrunOptions        = mdrunOptions;
243     newRunner.domdecOptions       = domdecOptions;
244     newRunner.nbpu_opt            = nbpu_opt;
245     newRunner.pme_opt             = pme_opt;
246     newRunner.pme_fft_opt         = pme_fft_opt;
247     newRunner.bonded_opt          = bonded_opt;
248     newRunner.update_opt          = update_opt;
249     newRunner.nstlist_cmdline     = nstlist_cmdline;
250     newRunner.replExParams        = replExParams;
251     newRunner.pforce              = pforce;
252     newRunner.ms                  = ms;
253     newRunner.startingBehavior    = startingBehavior;
254     newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
255
256     threadMpiMdrunnerAccessBarrier();
257
258     GMX_RELEASE_ASSERT(!MASTER(newRunner.cr), "cloneOnSpawnedThread should only be called on spawned threads");
259
260     return newRunner;
261 }
262
263 /*! \brief The callback used for running on spawned threads.
264  *
265  * Obtains the pointer to the master mdrunner object from the one
266  * argument permitted to the thread-launch API call, copies it to make
267  * a new runner for this thread, reinitializes necessary data, and
268  * proceeds to the simulation. */
269 static void mdrunner_start_fn(const void *arg)
270 {
271     try
272     {
273         auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg);
274         /* copy the arg list to make sure that it's thread-local. This
275            doesn't copy pointed-to items, of course; fnm, cr and fplog
276            are reset in the call below, all others should be const. */
277         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
278         mdrunner.mdrunner();
279     }
280     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
281 }
282
283
284 /*! \brief Start thread-MPI threads.
285  *
286  * Called by mdrunner() to start a specific number of threads
287  * (including the main thread) for thread-parallel runs. This in turn
288  * calls mdrunner() for each thread. All options are the same as for
289  * mdrunner(). */
290 t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const
291 {
292
293     /* first check whether we even need to start tMPI */
294     if (numThreadsToLaunch < 2)
295     {
296         return cr;
297     }
298
299 #if GMX_THREAD_MPI
300     /* now spawn new threads that start mdrunner_start_fn(), while
301        the main thread returns. Thread affinity is handled later. */
302     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
303                      mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
304     {
305         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
306     }
307
308     threadMpiMdrunnerAccessBarrier();
309 #else
310     GMX_UNUSED_VALUE(mdrunner_start_fn);
311 #endif
312
313     return reinitialize_commrec_for_this_thread(cr);
314 }
315
316 }  // namespace gmx
317
318 /*! \brief Initialize variables for Verlet scheme simulation */
319 static void prepare_verlet_scheme(FILE                           *fplog,
320                                   t_commrec                      *cr,
321                                   t_inputrec                     *ir,
322                                   int                             nstlist_cmdline,
323                                   const gmx_mtop_t               *mtop,
324                                   const matrix                    box,
325                                   bool                            makeGpuPairList,
326                                   const gmx::CpuInfo             &cpuinfo)
327 {
328     /* For NVE simulations, we will retain the initial list buffer */
329     if (EI_DYNAMICS(ir->eI) &&
330         ir->verletbuf_tol > 0 &&
331         !(EI_MD(ir->eI) && ir->etc == etcNO))
332     {
333         /* Update the Verlet buffer size for the current run setup */
334
335         /* Here we assume SIMD-enabled kernels are being used. But as currently
336          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
337          * and 4x2 gives a larger buffer than 4x4, this is ok.
338          */
339         ListSetupType      listType  = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
340         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
341
342         const real         rlist_new =
343             calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
344
345         if (rlist_new != ir->rlist)
346         {
347             if (fplog != nullptr)
348             {
349                 fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
350                         ir->rlist, rlist_new,
351                         listSetup.cluster_size_i, listSetup.cluster_size_j);
352             }
353             ir->rlist     = rlist_new;
354         }
355     }
356
357     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
358     {
359         gmx_fatal(FARGS, "Can not set nstlist without %s",
360                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
361     }
362
363     if (EI_DYNAMICS(ir->eI))
364     {
365         /* Set or try nstlist values */
366         increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
367     }
368 }
369
370 /*! \brief Override the nslist value in inputrec
371  *
372  * with value passed on the command line (if any)
373  */
374 static void override_nsteps_cmdline(const gmx::MDLogger &mdlog,
375                                     int64_t              nsteps_cmdline,
376                                     t_inputrec          *ir)
377 {
378     assert(ir);
379
380     /* override with anything else than the default -2 */
381     if (nsteps_cmdline > -2)
382     {
383         char sbuf_steps[STEPSTRSIZE];
384         char sbuf_msg[STRLEN];
385
386         ir->nsteps = nsteps_cmdline;
387         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
388         {
389             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
390                     gmx_step_str(nsteps_cmdline, sbuf_steps),
391                     fabs(nsteps_cmdline*ir->delta_t));
392         }
393         else
394         {
395             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
396                     gmx_step_str(nsteps_cmdline, sbuf_steps));
397         }
398
399         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
400     }
401     else if (nsteps_cmdline < -2)
402     {
403         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64,
404                   nsteps_cmdline);
405     }
406     /* Do nothing if nsteps_cmdline == -2 */
407 }
408
409 namespace gmx
410 {
411
412 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
413  *
414  * If not, and if a warning may be issued, logs a warning about
415  * falling back to CPU code. With thread-MPI, only the first
416  * call to this function should have \c issueWarning true. */
417 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
418                                                const t_inputrec &ir,
419                                                bool              issueWarning)
420 {
421     bool        gpuIsUseful = true;
422     std::string warning;
423
424     if (ir.opts.ngener - ir.nwall > 1)
425     {
426         /* The GPU code does not support more than one energy group.
427          * If the user requested GPUs explicitly, a fatal error is given later.
428          */
429         gpuIsUseful = false;
430         warning     =
431             "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
432             "For better performance, run on the GPU without energy groups and then do "
433             "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
434     }
435
436     if (EI_TPI(ir.eI))
437     {
438         gpuIsUseful = false;
439         warning     = "TPI is not implemented for GPUs.";
440     }
441
442     if (!gpuIsUseful && issueWarning)
443     {
444         GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
445     }
446
447     return gpuIsUseful;
448 }
449
450 //! Initializes the logger for mdrun.
451 static gmx::LoggerOwner buildLogger(FILE *fplog, const t_commrec *cr)
452 {
453     gmx::LoggerBuilder builder;
454     if (fplog != nullptr)
455     {
456         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
457     }
458     if (cr == nullptr || SIMMASTER(cr))
459     {
460         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning,
461                                 &gmx::TextOutputFile::standardError());
462     }
463     return builder.build();
464 }
465
466 //! Make a TaskTarget from an mdrun argument string.
467 static TaskTarget findTaskTarget(const char *optionString)
468 {
469     TaskTarget returnValue = TaskTarget::Auto;
470
471     if (strncmp(optionString, "auto", 3) == 0)
472     {
473         returnValue = TaskTarget::Auto;
474     }
475     else if (strncmp(optionString, "cpu", 3) == 0)
476     {
477         returnValue = TaskTarget::Cpu;
478     }
479     else if (strncmp(optionString, "gpu", 3) == 0)
480     {
481         returnValue = TaskTarget::Gpu;
482     }
483     else
484     {
485         GMX_ASSERT(false, "Option string should have been checked for sanity already");
486     }
487
488     return returnValue;
489 }
490
491 //! Finish run, aggregate data to print performance info.
492 static void finish_run(FILE *fplog,
493                        const gmx::MDLogger &mdlog,
494                        const t_commrec *cr,
495                        const t_inputrec *inputrec,
496                        t_nrnb nrnb[], gmx_wallcycle_t wcycle,
497                        gmx_walltime_accounting_t walltime_accounting,
498                        nonbonded_verlet_t *nbv,
499                        const gmx_pme_t *pme,
500                        gmx_bool bWriteStat)
501 {
502     double  delta_t  = 0;
503     double  nbfs     = 0, mflop = 0;
504     double  elapsed_time,
505             elapsed_time_over_all_ranks,
506             elapsed_time_over_all_threads,
507             elapsed_time_over_all_threads_over_all_ranks;
508     /* Control whether it is valid to print a report. Only the
509        simulation master may print, but it should not do so if the run
510        terminated e.g. before a scheduled reset step. This is
511        complicated by the fact that PME ranks are unaware of the
512        reason why they were sent a pmerecvqxFINISH. To avoid
513        communication deadlocks, we always do the communication for the
514        report, even if we've decided not to write the report, because
515        how long it takes to finish the run is not important when we've
516        decided not to report on the simulation performance.
517
518        Further, we only report performance for dynamical integrators,
519        because those are the only ones for which we plan to
520        consider doing any optimizations. */
521     bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
522
523     if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
524     {
525         GMX_LOG(mdlog.warning).asParagraph().appendText("Simulation ended prematurely, no performance report will be written.");
526         printReport = false;
527     }
528
529     t_nrnb                  *nrnb_tot;
530     std::unique_ptr<t_nrnb>  nrnbTotalStorage;
531     if (cr->nnodes > 1)
532     {
533         nrnbTotalStorage = std::make_unique<t_nrnb>();
534         nrnb_tot         = nrnbTotalStorage.get();
535 #if GMX_MPI
536         MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
537                       cr->mpi_comm_mysim);
538 #endif
539     }
540     else
541     {
542         nrnb_tot = nrnb;
543     }
544
545     elapsed_time                  = walltime_accounting_get_time_since_reset(walltime_accounting);
546     elapsed_time_over_all_threads = walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
547     if (cr->nnodes > 1)
548     {
549 #if GMX_MPI
550         /* reduce elapsed_time over all MPI ranks in the current simulation */
551         MPI_Allreduce(&elapsed_time,
552                       &elapsed_time_over_all_ranks,
553                       1, MPI_DOUBLE, MPI_SUM,
554                       cr->mpi_comm_mysim);
555         elapsed_time_over_all_ranks /= cr->nnodes;
556         /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
557          * current simulation. */
558         MPI_Allreduce(&elapsed_time_over_all_threads,
559                       &elapsed_time_over_all_threads_over_all_ranks,
560                       1, MPI_DOUBLE, MPI_SUM,
561                       cr->mpi_comm_mysim);
562 #endif
563     }
564     else
565     {
566         elapsed_time_over_all_ranks                  = elapsed_time;
567         elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
568     }
569
570     if (printReport)
571     {
572         print_flop(fplog, nrnb_tot, &nbfs, &mflop);
573     }
574
575     if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
576     {
577         print_dd_statistics(cr, inputrec, fplog);
578     }
579
580     /* TODO Move the responsibility for any scaling by thread counts
581      * to the code that handled the thread region, so that there's a
582      * mechanism to keep cycle counting working during the transition
583      * to task parallelism. */
584     int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
585     int nthreads_pme = gmx_omp_nthreads_get(emntPME);
586     wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), nthreads_pp, nthreads_pme);
587     auto cycle_sum(wallcycle_sum(cr, wcycle));
588
589     if (printReport)
590     {
591         auto                    nbnxn_gpu_timings = (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
592         gmx_wallclock_gpu_pme_t pme_gpu_timings   = {};
593
594         if (pme_gpu_task_enabled(pme))
595         {
596             pme_gpu_get_timings(pme, &pme_gpu_timings);
597         }
598         wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
599                         elapsed_time_over_all_ranks,
600                         wcycle, cycle_sum,
601                         nbnxn_gpu_timings,
602                         &pme_gpu_timings);
603
604         if (EI_DYNAMICS(inputrec->eI))
605         {
606             delta_t = inputrec->delta_t;
607         }
608
609         if (fplog)
610         {
611             print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
612                        elapsed_time_over_all_ranks,
613                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
614                        delta_t, nbfs, mflop);
615         }
616         if (bWriteStat)
617         {
618             print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
619                        elapsed_time_over_all_ranks,
620                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
621                        delta_t, nbfs, mflop);
622         }
623     }
624 }
625
626 int Mdrunner::mdrunner()
627 {
628     matrix                    box;
629     t_forcerec               *fr               = nullptr;
630     t_fcdata                 *fcd              = nullptr;
631     real                      ewaldcoeff_q     = 0;
632     real                      ewaldcoeff_lj    = 0;
633     int                       nChargePerturbed = -1, nTypePerturbed = 0;
634     gmx_wallcycle_t           wcycle;
635     gmx_walltime_accounting_t walltime_accounting = nullptr;
636     gmx_membed_t *            membed              = nullptr;
637     gmx_hw_info_t            *hwinfo              = nullptr;
638
639     /* CAUTION: threads may be started later on in this function, so
640        cr doesn't reflect the final parallel state right now */
641     gmx_mtop_t                      mtop;
642
643     bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data());
644     bool doRerun  = mdrunOptions.rerun;
645
646     // Handle task-assignment related user options.
647     EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
648                                                EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
649
650     std::vector<int> userGpuTaskAssignment;
651     try
652     {
653         userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
654     }
655     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
656     auto       nonbondedTarget = findTaskTarget(nbpu_opt);
657     auto       pmeTarget       = findTaskTarget(pme_opt);
658     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
659     auto       bondedTarget    = findTaskTarget(bonded_opt);
660     auto       updateTarget    = TaskTarget::Cpu;
661     PmeRunMode pmeRunMode      = PmeRunMode::None;
662
663     // Here we assume that SIMMASTER(cr) does not change even after the
664     // threads are started.
665
666     FILE *fplog = nullptr;
667     // If we are appending, we don't write log output because we need
668     // to check that the old log file matches what the checkpoint file
669     // expects. Otherwise, we should start to write log output now if
670     // there is a file ready for it.
671     if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
672     {
673         fplog = gmx_fio_getfp(logFileHandle);
674     }
675     gmx::LoggerOwner logOwner(buildLogger(fplog, cr));
676     gmx::MDLogger    mdlog(logOwner.logger());
677
678     // report any development features that may be enabled by environment variables
679     manageDevelopmentFeatures(mdlog);
680
681     // With thread-MPI, the communicator changes after threads are
682     // launched, so this is rebuilt for the master rank at that
683     // time. The non-master ranks are fine to keep the one made here.
684     PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
685     hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
686
687     gmx_print_detected_hardware(fplog, isMasterSimMasterRank(ms, MASTER(cr)), mdlog, hwinfo);
688
689     std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
690
691     // Print citation requests after all software/hardware printing
692     pleaseCiteGromacs(fplog);
693
694     // TODO Replace this by unique_ptr once t_inputrec is C++
695     t_inputrec               inputrecInstance;
696     t_inputrec              *inputrec = nullptr;
697     std::unique_ptr<t_state> globalState;
698
699     auto                     partialDeserializedTpr = std::make_unique<PartialDeserializedTprFile>();
700
701     if (SIMMASTER(cr))
702     {
703         /* Only the master rank has the global state */
704         globalState = std::make_unique<t_state>();
705
706         /* Read (nearly) all data required for the simulation
707          * and keep the partly serialized tpr contents to send to other ranks later
708          */
709         *partialDeserializedTpr = read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), &inputrecInstance, globalState.get(), &mtop);
710         inputrec                = &inputrecInstance;
711     }
712
713     /* Check and update the hardware options for internal consistency */
714     checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks, inputrec);
715
716     if (GMX_THREAD_MPI && SIMMASTER(cr))
717     {
718         bool useGpuForNonbonded = false;
719         bool useGpuForPme       = false;
720         try
721         {
722             GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
723
724             // If the user specified the number of ranks, then we must
725             // respect that, but in default mode, we need to allow for
726             // the number of GPUs to choose the number of ranks.
727             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
728             useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
729                     (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
730                     canUseGpuForNonbonded,
731                     gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
732                     hw_opt.nthreads_tmpi);
733             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
734                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
735                     *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
736
737         }
738         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
739
740         /* Determine how many thread-MPI ranks to start.
741          *
742          * TODO Over-writing the user-supplied value here does
743          * prevent any possible subsequent checks from working
744          * correctly. */
745         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
746                                                 &hw_opt,
747                                                 gpuIdsToUse,
748                                                 useGpuForNonbonded,
749                                                 useGpuForPme,
750                                                 inputrec, &mtop,
751                                                 mdlog,
752                                                 doMembed);
753
754         // Now start the threads for thread MPI.
755         cr = spawnThreads(hw_opt.nthreads_tmpi);
756         /* The main thread continues here with a new cr. We don't deallocate
757            the old cr because other threads may still be reading it. */
758         // TODO Both master and spawned threads call dup_tfn and
759         // reinitialize_commrec_for_this_thread. Find a way to express
760         // this better.
761         physicalNodeComm = PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
762     }
763     // END OF CAUTION: cr and physicalNodeComm are now reliable
764
765     if (PAR(cr))
766     {
767         /* now broadcast everything to the non-master nodes/threads: */
768         if (!SIMMASTER(cr))
769         {
770             inputrec = &inputrecInstance;
771         }
772         init_parallel(cr, inputrec, &mtop, partialDeserializedTpr.get());
773     }
774     GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now");
775     partialDeserializedTpr.reset(nullptr);
776
777     // Now each rank knows the inputrec that SIMMASTER read and used,
778     // and (if applicable) cr->nnodes has been assigned the number of
779     // thread-MPI ranks that have been chosen. The ranks can now all
780     // run the task-deciding functions and will agree on the result
781     // without needing to communicate.
782     //
783     // TODO Should we do the communication in debug mode to support
784     // having an assertion?
785     //
786     // Note that these variables describe only their own node.
787     //
788     // Note that when bonded interactions run on a GPU they always run
789     // alongside a nonbonded task, so do not influence task assignment
790     // even though they affect the force calculation workload.
791     bool useGpuForNonbonded     = false;
792     bool useGpuForPme           = false;
793     bool useGpuForBonded        = false;
794     bool useGpuForUpdate        = false;
795     bool gpusWereDetected       = hwinfo->ngpu_compatible_tot > 0;
796     try
797     {
798         // It's possible that there are different numbers of GPUs on
799         // different nodes, which is the user's responsibilty to
800         // handle. If unsuitable, we will notice that during task
801         // assignment.
802         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
803         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
804                                                                 emulateGpuNonbonded,
805                                                                 canUseGpuForNonbonded,
806                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI),
807                                                                 gpusWereDetected);
808         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
809                                                     *hwinfo, *inputrec, mtop,
810                                                     cr->nnodes, domdecOptions.numPmeRanks,
811                                                     gpusWereDetected);
812         auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
813         useGpuForBonded =
814             decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
815                                             bondedTarget, canUseGpuForBonded,
816                                             EVDW_PME(inputrec->vdwtype),
817                                             EEL_PME_EWALD(inputrec->coulombtype),
818                                             domdecOptions.numPmeRanks, gpusWereDetected);
819
820         pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
821         if (pmeRunMode == PmeRunMode::GPU)
822         {
823             if (pmeFftTarget == TaskTarget::Cpu)
824             {
825                 pmeRunMode = PmeRunMode::Mixed;
826             }
827         }
828         else if (pmeFftTarget == TaskTarget::Gpu)
829         {
830             gmx_fatal(FARGS, "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME on CPU you should not be using -pmefft.");
831         }
832     }
833     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
834
835     // Build restraints.
836     // TODO: hide restraint implementation details from Mdrunner.
837     // There is nothing unique about restraints at this point as far as the
838     // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
839     // factory functions from the SimulationContext on which to call mdModules_->add().
840     // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
841     for (auto && restraint : restraintManager_->getRestraints())
842     {
843         auto module = RestraintMDModule::create(restraint,
844                                                 restraint->sites());
845         mdModules_->add(std::move(module));
846     }
847
848     // TODO: Error handling
849     mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
850     const auto &mdModulesNotifier = mdModules_->notifier().notifier_;
851
852     if (inputrec->internalParameters != nullptr)
853     {
854         mdModulesNotifier.notify(*inputrec->internalParameters);
855     }
856
857     if (fplog != nullptr)
858     {
859         pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
860         fprintf(fplog, "\n");
861     }
862
863     if (SIMMASTER(cr))
864     {
865         /* In rerun, set velocities to zero if present */
866         if (doRerun && ((globalState->flags & (1 << estV)) != 0))
867         {
868             // rerun does not use velocities
869             GMX_LOG(mdlog.info).asParagraph().appendText(
870                     "Rerun trajectory contains velocities. Rerun does only evaluate "
871                     "potential energy and forces. The velocities will be ignored.");
872             for (int i = 0; i < globalState->natoms; i++)
873             {
874                 clear_rvec(globalState->v[i]);
875             }
876             globalState->flags &= ~(1 << estV);
877         }
878
879         /* now make sure the state is initialized and propagated */
880         set_state_entries(globalState.get(), inputrec);
881     }
882
883     /* NM and TPI parallelize over force/energy calculations, not atoms,
884      * so we need to initialize and broadcast the global state.
885      */
886     if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
887     {
888         if (!MASTER(cr))
889         {
890             globalState = std::make_unique<t_state>();
891         }
892         broadcastStateWithoutDynamics(cr, globalState.get());
893     }
894
895     /* A parallel command line option consistency check that we can
896        only do after any threads have started. */
897     if (!PAR(cr) && (domdecOptions.numCells[XX] > 1 ||
898                      domdecOptions.numCells[YY] > 1 ||
899                      domdecOptions.numCells[ZZ] > 1 ||
900                      domdecOptions.numPmeRanks > 0))
901     {
902         gmx_fatal(FARGS,
903                   "The -dd or -npme option request a parallel simulation, "
904 #if !GMX_MPI
905                   "but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv));
906 #elif GMX_THREAD_MPI
907                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
908 #else
909                   "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv));
910 #endif
911     }
912
913     if (doRerun &&
914         (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
915     {
916         gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
917     }
918
919     if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
920     {
921         if (domdecOptions.numPmeRanks > 0)
922         {
923             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
924                                  "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
925         }
926
927         domdecOptions.numPmeRanks = 0;
928     }
929
930     if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
931     {
932         /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
933          * improve performance with many threads per GPU, since our OpenMP
934          * scaling is bad, but it's difficult to automate the setup.
935          */
936         domdecOptions.numPmeRanks = 0;
937     }
938     if (useGpuForPme)
939     {
940         if (domdecOptions.numPmeRanks < 0)
941         {
942             domdecOptions.numPmeRanks = 0;
943             // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
944         }
945         else
946         {
947             GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported");
948         }
949     }
950
951 #if GMX_FAHCORE
952     if (MASTER(cr))
953     {
954         fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
955     }
956 #endif
957
958     /* NMR restraints must be initialized before load_checkpoint,
959      * since with time averaging the history is added to t_state.
960      * For proper consistency check we therefore need to extend
961      * t_state here.
962      * So the PME-only nodes (if present) will also initialize
963      * the distance restraints.
964      */
965     snew(fcd, 1);
966
967     /* This needs to be called before read_checkpoint to extend the state */
968     init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
969
970     init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
971
972     auto                 deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
973
974     ObservablesHistory   observablesHistory = {};
975
976     if (startingBehavior != StartingBehavior::NewSimulation)
977     {
978         /* Check if checkpoint file exists before doing continuation.
979          * This way we can use identical input options for the first and subsequent runs...
980          */
981         if (mdrunOptions.numStepsCommandline > -2)
982         {
983             /* Temporarily set the number of steps to unmlimited to avoid
984              * triggering the nsteps check in load_checkpoint().
985              * This hack will go away soon when the -nsteps option is removed.
986              */
987             inputrec->nsteps = -1;
988         }
989
990         load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr),
991                         logFileHandle,
992                         cr, domdecOptions.numCells,
993                         inputrec, globalState.get(),
994                         &observablesHistory,
995                         mdrunOptions.reproducible, mdModules_->notifier());
996
997         if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
998         {
999             // Now we can start normal logging to the truncated log file.
1000             fplog    = gmx_fio_getfp(logFileHandle);
1001             prepareLogAppending(fplog);
1002             logOwner = buildLogger(fplog, cr);
1003             mdlog    = logOwner.logger();
1004         }
1005     }
1006
1007     if (mdrunOptions.numStepsCommandline > -2)
1008     {
1009         GMX_LOG(mdlog.info).asParagraph().
1010             appendText("The -nsteps functionality is deprecated, and may be removed in a future version. "
1011                        "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp file field.");
1012     }
1013     /* override nsteps with value set on the commamdline */
1014     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
1015
1016     if (SIMMASTER(cr))
1017     {
1018         copy_mat(globalState->box, box);
1019     }
1020
1021     if (PAR(cr))
1022     {
1023         gmx_bcast(sizeof(box), box, cr);
1024     }
1025
1026     if (inputrec->cutoff_scheme != ecutsVERLET)
1027     {
1028         gmx_fatal(FARGS, "This group-scheme .tpr file can no longer be run by mdrun. Please update to the Verlet scheme, or use an earlier version of GROMACS if necessary.");
1029     }
1030     /* Update rlist and nstlist. */
1031     prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
1032                           useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
1033
1034     const bool          prefer1DAnd1PulseDD = (c_enableGpuHaloExchange && useGpuForNonbonded);
1035     LocalAtomSetManager atomSets;
1036     if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
1037                      inputrec->eI == eiNM))
1038     {
1039         cr->dd = init_domain_decomposition(mdlog, cr, domdecOptions, mdrunOptions,
1040                                            prefer1DAnd1PulseDD,
1041                                            &mtop, inputrec,
1042                                            box, positionsFromStatePointer(globalState.get()),
1043                                            &atomSets);
1044         // Note that local state still does not exist yet.
1045     }
1046     else
1047     {
1048         /* PME, if used, is done on all nodes with 1D decomposition */
1049         cr->npmenodes = 0;
1050         cr->duty      = (DUTY_PP | DUTY_PME);
1051
1052         if (inputrec->ePBC == epbcSCREW)
1053         {
1054             gmx_fatal(FARGS,
1055                       "pbc=screw is only implemented with domain decomposition");
1056         }
1057     }
1058
1059     if (PAR(cr))
1060     {
1061         /* After possible communicator splitting in make_dd_communicators.
1062          * we can set up the intra/inter node communication.
1063          */
1064         gmx_setup_nodecomm(fplog, cr);
1065     }
1066
1067 #if GMX_MPI
1068     if (isMultiSim(ms))
1069     {
1070         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1071                 "This is simulation %d out of %d running as a composite GROMACS\n"
1072                 "multi-simulation job. Setup for this simulation:\n",
1073                 ms->sim, ms->nsim);
1074     }
1075     GMX_LOG(mdlog.warning).appendTextFormatted(
1076             "Using %d MPI %s\n",
1077             cr->nnodes,
1078 #if GMX_THREAD_MPI
1079             cr->nnodes == 1 ? "thread" : "threads"
1080 #else
1081             cr->nnodes == 1 ? "process" : "processes"
1082 #endif
1083             );
1084     fflush(stderr);
1085 #endif
1086
1087     // If mdrun -pin auto honors any affinity setting that already
1088     // exists. If so, it is nice to provide feedback about whether
1089     // that existing affinity setting was from OpenMP or something
1090     // else, so we run this code both before and after we initialize
1091     // the OpenMP support.
1092     gmx_check_thread_affinity_set(mdlog,
1093                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
1094     /* Check and update the number of OpenMP threads requested */
1095     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
1096                                             pmeRunMode, mtop, *inputrec);
1097
1098     gmx_omp_nthreads_init(mdlog, cr,
1099                           hwinfo->nthreads_hw_avail,
1100                           physicalNodeComm.size_,
1101                           hw_opt.nthreads_omp,
1102                           hw_opt.nthreads_omp_pme,
1103                           !thisRankHasDuty(cr, DUTY_PP));
1104
1105     // Enable FP exception detection, but not in
1106     // Release mode and not for compilers with known buggy FP
1107     // exception support (clang with any optimization) or suspected
1108     // buggy FP exception support (gcc 7.* with optimization).
1109 #if !defined NDEBUG && \
1110     !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
1111     && defined __OPTIMIZE__)
1112     const bool bEnableFPE = true;
1113 #else
1114     const bool bEnableFPE = false;
1115 #endif
1116     //FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
1117     if (bEnableFPE)
1118     {
1119         gmx_feenableexcept();
1120     }
1121
1122     // TODO This could move before init_domain_decomposition() as part
1123     // of refactoring that separates the responsibility for duty
1124     // assignment from setup for communication between tasks, and
1125     // setup for tasks handled with a domain (ie including short-ranged
1126     // tasks, bonded tasks, etc.).
1127     //
1128     // Note that in general useGpuForNonbonded, etc. can have a value
1129     // that is inconsistent with the presence of actual GPUs on any
1130     // rank, and that is not known to be a problem until the
1131     // duty of the ranks on a node become known.
1132
1133     // Produce the task assignment for this rank.
1134     GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
1135     GpuTaskAssignments        gpuTaskAssignments =
1136         gpuTaskAssignmentsBuilder.build(gpuIdsToUse,
1137                                         userGpuTaskAssignment,
1138                                         *hwinfo,
1139                                         cr,
1140                                         ms,
1141                                         physicalNodeComm,
1142                                         nonbondedTarget,
1143                                         pmeTarget,
1144                                         bondedTarget,
1145                                         updateTarget,
1146                                         useGpuForNonbonded,
1147                                         useGpuForPme,
1148                                         thisRankHasDuty(cr, DUTY_PP),
1149                                         // TODO cr->duty & DUTY_PME should imply that a PME
1150                                         // algorithm is active, but currently does not.
1151                                         EEL_PME(inputrec->coulombtype) &&
1152                                         thisRankHasDuty(cr, DUTY_PME));
1153
1154     const bool printHostName = (cr->nnodes > 1);
1155     gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode);
1156
1157     // If the user chose a task assignment, give them some hints
1158     // where appropriate.
1159     if (!userGpuTaskAssignment.empty())
1160     {
1161         gpuTaskAssignments.logPerformanceHints(mdlog,
1162                                                ssize(gpuIdsToUse));
1163     }
1164
1165     /* Now that we know the setup is consistent, check for efficiency */
1166     check_resource_division_efficiency(hwinfo,
1167                                        gpuTaskAssignments.thisRankHasAnyGpuTask(),
1168                                        mdrunOptions.ntompOptionIsSet,
1169                                        cr,
1170                                        mdlog);
1171
1172     // Get the device handles for the modules, nullptr when no task is assigned.
1173     gmx_device_info_t *nonbondedDeviceInfo   = gpuTaskAssignments.initNonbondedDevice(cr);
1174     gmx_device_info_t *pmeDeviceInfo         = gpuTaskAssignments.initPmeDevice();
1175     const bool         thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
1176
1177     // TODO should live in ewald module once its testing is improved
1178     //
1179     // Later, this program could contain kernels that might be later
1180     // re-used as auto-tuning progresses, or subsequent simulations
1181     // are invoked.
1182     PmeGpuProgramStorage pmeGpuProgram;
1183     if (thisRankHasPmeGpuTask)
1184     {
1185         pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
1186     }
1187
1188     /* getting number of PP/PME threads on this MPI / tMPI rank.
1189        PME: env variable should be read only on one node to make sure it is
1190        identical everywhere;
1191      */
1192     const int numThreadsOnThisRank =
1193         thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded) : gmx_omp_nthreads_get(emntPME);
1194     checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid,
1195                                   *hwinfo->hardwareTopology,
1196                                   physicalNodeComm, mdlog);
1197
1198     if (hw_opt.threadAffinity != ThreadAffinity::Off)
1199     {
1200         /* Before setting affinity, check whether the affinity has changed
1201          * - which indicates that probably the OpenMP library has changed it
1202          * since we first checked).
1203          */
1204         gmx_check_thread_affinity_set(mdlog,
1205                                       &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
1206
1207         int numThreadsOnThisNode, intraNodeThreadOffset;
1208         analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
1209                                  &intraNodeThreadOffset);
1210
1211         /* Set the CPU affinity */
1212         gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology,
1213                                 numThreadsOnThisRank, numThreadsOnThisNode,
1214                                 intraNodeThreadOffset, nullptr);
1215     }
1216
1217     if (mdrunOptions.timingOptions.resetStep > -1)
1218     {
1219         GMX_LOG(mdlog.info).asParagraph().
1220             appendText("The -resetstep functionality is deprecated, and may be removed in a future version.");
1221     }
1222     wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1223
1224     if (PAR(cr))
1225     {
1226         /* Master synchronizes its value of reset_counters with all nodes
1227          * including PME only nodes */
1228         int64_t reset_counters = wcycle_get_reset_counters(wcycle);
1229         gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
1230         wcycle_set_reset_counters(wcycle, reset_counters);
1231     }
1232
1233     // Membrane embedding must be initialized before we call init_forcerec()
1234     if (doMembed)
1235     {
1236         if (MASTER(cr))
1237         {
1238             fprintf(stderr, "Initializing membed");
1239         }
1240         /* Note that membed cannot work in parallel because mtop is
1241          * changed here. Fix this if we ever want to make it run with
1242          * multiple ranks. */
1243         membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec, globalState.get(), cr,
1244                              &mdrunOptions
1245                                  .checkpointOptions.period);
1246     }
1247
1248     std::unique_ptr<MDAtoms>     mdAtoms;
1249     std::unique_ptr<gmx_vsite_t> vsite;
1250
1251     t_nrnb nrnb;
1252     if (thisRankHasDuty(cr, DUTY_PP))
1253     {
1254         mdModulesNotifier.notify(*cr);
1255         mdModulesNotifier.notify(&atomSets);
1256         mdModulesNotifier.notify(PeriodicBoundaryConditionType {inputrec->ePBC});
1257         /* Initiate forcerecord */
1258         fr                 = new t_forcerec;
1259         fr->forceProviders = mdModules_->initForceProviders();
1260         init_forcerec(fplog, mdlog, fr, fcd,
1261                       inputrec, &mtop, cr, box,
1262                       opt2fn("-table", filenames.size(), filenames.data()),
1263                       opt2fn("-tablep", filenames.size(), filenames.data()),
1264                       opt2fns("-tableb", filenames.size(), filenames.data()),
1265                       *hwinfo, nonbondedDeviceInfo,
1266                       useGpuForBonded,
1267                       pforce,
1268                       wcycle);
1269
1270         // TODO Move this to happen during domain decomposition setup,
1271         // once stream and event handling works well with that.
1272         // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
1273         if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
1274         {
1275             GMX_RELEASE_ASSERT(c_enableGpuBufOps, "Must use GMX_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
1276             void *streamLocal                   = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
1277             void *streamNonLocal                =
1278                 Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
1279             void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event();
1280             GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1281                     "NOTE: This run uses the 'GPU halo exchange' feature, enabled by the GMX_GPU_DD_COMMS environment variable.");
1282             cr->dd->gpuHaloExchange = std::make_unique<GpuHaloExchange>(cr->dd, cr->mpi_comm_mysim, streamLocal,
1283                                                                         streamNonLocal, coordinatesOnDeviceEvent);
1284         }
1285
1286         /* Initialize the mdAtoms structure.
1287          * mdAtoms is not filled with atom data,
1288          * as this can not be done now with domain decomposition.
1289          */
1290         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1291         if (globalState && thisRankHasPmeGpuTask)
1292         {
1293             // The pinning of coordinates in the global state object works, because we only use
1294             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1295             // points to the global state object without DD.
1296             // FIXME: MD and EM separately set up the local state - this should happen in the same function,
1297             // which should also perform the pinning.
1298             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1299         }
1300
1301         /* Initialize the virtual site communication */
1302         vsite = initVsite(mtop, cr);
1303
1304         calc_shifts(box, fr->shift_vec);
1305
1306         /* With periodic molecules the charge groups should be whole at start up
1307          * and the virtual sites should not be far from their proper positions.
1308          */
1309         if (!inputrec->bContinuation && MASTER(cr) &&
1310             !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
1311         {
1312             /* Make molecules whole at start of run */
1313             if (fr->ePBC != epbcNONE)
1314             {
1315                 do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
1316             }
1317             if (vsite)
1318             {
1319                 /* Correct initial vsite positions are required
1320                  * for the initial distribution in the domain decomposition
1321                  * and for the initial shell prediction.
1322                  */
1323                 constructVsitesGlobal(mtop, globalState->x);
1324             }
1325         }
1326
1327         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1328         {
1329             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1330             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1331         }
1332     }
1333     else
1334     {
1335         /* This is a PME only node */
1336
1337         GMX_ASSERT(globalState == nullptr, "We don't need the state on a PME only rank and expect it to be unitialized");
1338
1339         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1340         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1341     }
1342
1343     gmx_pme_t *sepPmeData = nullptr;
1344     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1345     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), "Double-checking that only PME-only ranks have no forcerec");
1346     gmx_pme_t * &pmedata = fr ? fr->pmedata : sepPmeData;
1347
1348     /* Initiate PME if necessary,
1349      * either on all nodes or on dedicated PME nodes only. */
1350     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1351     {
1352         if (mdAtoms && mdAtoms->mdatoms())
1353         {
1354             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1355             if (EVDW_PME(inputrec->vdwtype))
1356             {
1357                 nTypePerturbed   = mdAtoms->mdatoms()->nTypePerturbed;
1358             }
1359         }
1360         if (cr->npmenodes > 0)
1361         {
1362             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1363             gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
1364             gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
1365         }
1366
1367         if (thisRankHasDuty(cr, DUTY_PME))
1368         {
1369             try
1370             {
1371                 pmedata = gmx_pme_init(cr,
1372                                        getNumPmeDomains(cr->dd),
1373                                        inputrec,
1374                                        nChargePerturbed != 0, nTypePerturbed != 0,
1375                                        mdrunOptions.reproducible,
1376                                        ewaldcoeff_q, ewaldcoeff_lj,
1377                                        gmx_omp_nthreads_get(emntPME),
1378                                        pmeRunMode, nullptr,
1379                                        pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
1380             }
1381             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1382         }
1383     }
1384
1385
1386     if (EI_DYNAMICS(inputrec->eI))
1387     {
1388         /* Turn on signal handling on all nodes */
1389         /*
1390          * (A user signal from the PME nodes (if any)
1391          * is communicated to the PP nodes.
1392          */
1393         signal_handler_install();
1394     }
1395
1396     pull_t *pull_work = nullptr;
1397     if (thisRankHasDuty(cr, DUTY_PP))
1398     {
1399         /* Assumes uniform use of the number of OpenMP threads */
1400         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
1401
1402         if (inputrec->bPull)
1403         {
1404             /* Initialize pull code */
1405             pull_work =
1406                 init_pull(fplog, inputrec->pull, inputrec,
1407                           &mtop, cr, &atomSets, inputrec->fepvals->init_lambda);
1408             if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
1409             {
1410                 initPullHistory(pull_work, &observablesHistory);
1411             }
1412             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1413             {
1414                 init_pull_output_files(pull_work,
1415                                        filenames.size(), filenames.data(), oenv,
1416                                        startingBehavior);
1417             }
1418         }
1419
1420         std::unique_ptr<EnforcedRotation> enforcedRotation;
1421         if (inputrec->bRot)
1422         {
1423             /* Initialize enforced rotation code */
1424             enforcedRotation = init_rot(fplog,
1425                                         inputrec,
1426                                         filenames.size(),
1427                                         filenames.data(),
1428                                         cr,
1429                                         &atomSets,
1430                                         globalState.get(),
1431                                         &mtop,
1432                                         oenv,
1433                                         mdrunOptions,
1434                                         startingBehavior);
1435         }
1436
1437         t_swap *swap = nullptr;
1438         if (inputrec->eSwapCoords != eswapNO)
1439         {
1440             /* Initialize ion swapping code */
1441             swap = init_swapcoords(fplog, inputrec,
1442                                    opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
1443                                    &mtop, globalState.get(), &observablesHistory,
1444                                    cr, &atomSets, oenv, mdrunOptions,
1445                                    startingBehavior);
1446         }
1447
1448         /* Let makeConstraints know whether we have essential dynamics constraints.
1449          * TODO: inputrec should tell us whether we use an algorithm, not a file option or the checkpoint
1450          */
1451         bool doEssentialDynamics = (opt2fn_null("-ei", filenames.size(), filenames.data()) != nullptr
1452                                     || observablesHistory.edsamHistory);
1453         auto constr              = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics,
1454                                                    fplog, *mdAtoms->mdatoms(),
1455                                                    cr, ms, &nrnb, wcycle, fr->bMolPBC);
1456
1457         /* Energy terms and groups */
1458         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(), inputrec->fepvals->n_lambda);
1459
1460         /* Kinetic energy data */
1461         gmx_ekindata_t ekind;
1462         init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel);
1463
1464         /* Set up interactive MD (IMD) */
1465         auto imdSession = makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
1466                                          MASTER(cr) ? globalState->x.rvec_array() : nullptr,
1467                                          filenames.size(), filenames.data(), oenv, mdrunOptions.imdOptions,
1468                                          startingBehavior);
1469
1470         if (DOMAINDECOMP(cr))
1471         {
1472             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1473             /* This call is not included in init_domain_decomposition mainly
1474              * because fr->cginfo_mb is set later.
1475              */
1476             dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
1477                             domdecOptions.checkBondedInteractions,
1478                             fr->cginfo_mb);
1479         }
1480
1481         if (updateTarget == TaskTarget::Gpu)
1482         {
1483             if (SIMMASTER(cr))
1484             {
1485                 gmx_fatal(FARGS, "It is currently not possible to redirect the calculation "
1486                           "of update and constraints to the GPU!");
1487             }
1488         }
1489
1490         // Before we start the actual simulator, try if we can run the update task on the GPU.
1491         useGpuForUpdate = decideWhetherToUseGpuForUpdate(DOMAINDECOMP(cr),
1492                                                          useGpuForPme,
1493                                                          useGpuForNonbonded,
1494                                                          c_enableGpuBufOps,
1495                                                          updateTarget,
1496                                                          gpusWereDetected,
1497                                                          *inputrec,
1498                                                          *mdAtoms,
1499                                                          doEssentialDynamics,
1500                                                          fcd->orires.nr != 0,
1501                                                          fcd->disres.nsystems != 0);
1502
1503         // TODO This is not the right place to manage the lifetime of
1504         // this data structure, but currently it's the easiest way to
1505         // make it work.
1506         MdrunScheduleWorkload runScheduleWork;
1507
1508         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
1509         SimulatorBuilder simulatorBuilder;
1510
1511         // build and run simulator object based on user-input
1512         const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
1513                     false,
1514                     inputrec, doRerun, vsite.get(), ms, replExParams,
1515                     fcd, static_cast<int>(filenames.size()), filenames.data(),
1516                     &observablesHistory, membed);
1517         auto simulator = simulatorBuilder.build(
1518                     inputIsCompatibleWithModularSimulator,
1519                     fplog, cr, ms, mdlog, static_cast<int>(filenames.size()), filenames.data(),
1520                     oenv,
1521                     mdrunOptions,
1522                     startingBehavior,
1523                     vsite.get(), constr.get(),
1524                     enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
1525                     deform.get(),
1526                     mdModules_->outputProvider(),
1527                     mdModules_->notifier(),
1528                     inputrec, imdSession.get(), pull_work, swap, &mtop,
1529                     fcd,
1530                     globalState.get(),
1531                     &observablesHistory,
1532                     mdAtoms.get(), &nrnb, wcycle, fr,
1533                     &enerd,
1534                     &ekind,
1535                     &runScheduleWork,
1536                     replExParams,
1537                     membed,
1538                     walltime_accounting,
1539                     std::move(stopHandlerBuilder_),
1540                     doRerun,
1541                     useGpuForUpdate);
1542         simulator->run();
1543
1544         if (inputrec->bPull)
1545         {
1546             finish_pull(pull_work);
1547         }
1548         finish_swapcoords(swap);
1549     }
1550     else
1551     {
1552         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
1553         /* do PME only */
1554         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
1555         gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
1556     }
1557
1558     wallcycle_stop(wcycle, ewcRUN);
1559
1560     /* Finish up, write some stuff
1561      * if rerunMD, don't write last frame again
1562      */
1563     finish_run(fplog, mdlog, cr,
1564                inputrec, &nrnb, wcycle, walltime_accounting,
1565                fr ? fr->nbv.get() : nullptr,
1566                pmedata,
1567                EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
1568
1569     // clean up cycle counter
1570     wallcycle_destroy(wcycle);
1571
1572 // Free PME data
1573     if (pmedata)
1574     {
1575         gmx_pme_destroy(pmedata);
1576         pmedata = nullptr;
1577     }
1578
1579     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
1580     // before we destroy the GPU context(s) in free_gpu_resources().
1581     // Pinned buffers are associated with contexts in CUDA.
1582     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
1583     mdAtoms.reset(nullptr);
1584     globalState.reset(nullptr);
1585     mdModules_.reset(nullptr);   // destruct force providers here as they might also use the GPU
1586
1587     /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
1588     free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
1589     free_gpu(nonbondedDeviceInfo);
1590     free_gpu(pmeDeviceInfo);
1591     done_forcerec(fr, mtop.molblock.size());
1592     sfree(fcd);
1593
1594     if (doMembed)
1595     {
1596         free_membed(membed);
1597     }
1598
1599     /* Does what it says */
1600     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
1601     walltime_accounting_destroy(walltime_accounting);
1602
1603     // Ensure log file content is written
1604     if (logFileHandle)
1605     {
1606         gmx_fio_flush(logFileHandle);
1607     }
1608
1609     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
1610      * exceptions were enabled before function was called. */
1611     if (bEnableFPE)
1612     {
1613         gmx_fedisableexcept();
1614     }
1615
1616     auto rc = static_cast<int>(gmx_get_stop_condition());
1617
1618 #if GMX_THREAD_MPI
1619     /* we need to join all threads. The sub-threads join when they
1620        exit this function, but the master thread needs to be told to
1621        wait for that. */
1622     if (PAR(cr) && MASTER(cr))
1623     {
1624         tMPI_Finalize();
1625     }
1626 #endif
1627     return rc;
1628 }
1629
1630 Mdrunner::~Mdrunner()
1631 {
1632     // Clean up of the Manager.
1633     // This will end up getting called on every thread-MPI rank, which is unnecessary,
1634     // but okay as long as threads synchronize some time before adding or accessing
1635     // a new set of restraints.
1636     if (restraintManager_)
1637     {
1638         restraintManager_->clear();
1639         GMX_ASSERT(restraintManager_->countRestraints() == 0,
1640                    "restraints added during runner life time should be cleared at runner destruction.");
1641     }
1642 };
1643
1644 void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller,
1645                             const std::string                        &name)
1646 {
1647     GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
1648     // Not sure if this should be logged through the md logger or something else,
1649     // but it is helpful to have some sort of INFO level message sent somewhere.
1650     //    std::cout << "Registering restraint named " << name << std::endl;
1651
1652     // When multiple restraints are used, it may be wasteful to register them separately.
1653     // Maybe instead register an entire Restraint Manager as a force provider.
1654     restraintManager_->addToSpec(std::move(puller),
1655                                  name);
1656 }
1657
1658 Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules)
1659     : mdModules_(std::move(mdModules))
1660 {
1661 }
1662
1663 Mdrunner::Mdrunner(Mdrunner &&) noexcept = default;
1664
1665 //NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
1666 Mdrunner &Mdrunner::operator=(Mdrunner && /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
1667
1668 class Mdrunner::BuilderImplementation
1669 {
1670     public:
1671         BuilderImplementation() = delete;
1672         BuilderImplementation(std::unique_ptr<MDModules> mdModules);
1673         ~BuilderImplementation();
1674
1675         BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options,
1676                                                     real                forceWarningThreshold,
1677                                                     StartingBehavior    startingBehavior);
1678
1679         void addDomdec(const DomdecOptions &options);
1680
1681         void addVerletList(int nstlist);
1682
1683         void addReplicaExchange(const ReplicaExchangeParameters &params);
1684
1685         void addMultiSim(gmx_multisim_t* multisim);
1686
1687         void addCommunicationRecord(t_commrec *cr);
1688
1689         void addNonBonded(const char* nbpu_opt);
1690
1691         void addPME(const char* pme_opt_, const char* pme_fft_opt_);
1692
1693         void addBondedTaskAssignment(const char* bonded_opt);
1694
1695         void addUpdateTaskAssignment(const char* update_opt);
1696
1697         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
1698
1699         void addFilenames(ArrayRef <const t_filenm> filenames);
1700
1701         void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
1702
1703         void addLogFile(t_fileio *logFileHandle);
1704
1705         void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
1706
1707         Mdrunner build();
1708
1709     private:
1710
1711         // Default parameters copied from runner.h
1712         // \todo Clarify source(s) of default parameters.
1713
1714         const char* nbpu_opt_          = nullptr;
1715         const char* pme_opt_           = nullptr;
1716         const char* pme_fft_opt_       = nullptr;
1717         const char *bonded_opt_        = nullptr;
1718         const char *update_opt_        = nullptr;
1719
1720         MdrunOptions                          mdrunOptions_;
1721
1722         DomdecOptions                         domdecOptions_;
1723
1724         ReplicaExchangeParameters             replicaExchangeParameters_;
1725
1726         //! Command-line override for the duration of a neighbor list with the Verlet scheme.
1727         int         nstlist_ = 0;
1728
1729         //! Multisim communicator handle.
1730         std::unique_ptr<gmx_multisim_t*>      multisim_ = nullptr;
1731
1732         //! Non-owning communication record (only used when building command-line mdrun)
1733         t_commrec *communicationRecord_ = nullptr;
1734
1735         //! Print a warning if any force is larger than this (in kJ/mol nm).
1736         real forceWarningThreshold_ = -1;
1737
1738         //! Whether the simulation will start afresh, or restart with/without appending.
1739         StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
1740
1741         //! The modules that comprise the functionality of mdrun.
1742         std::unique_ptr<MDModules> mdModules_;
1743
1744         //! \brief Parallelism information.
1745         gmx_hw_opt_t hardwareOptions_;
1746
1747         //! filename options for simulation.
1748         ArrayRef<const t_filenm> filenames_;
1749
1750         /*! \brief Handle to output environment.
1751          *
1752          * \todo gmx_output_env_t needs lifetime management.
1753          */
1754         gmx_output_env_t*    outputEnvironment_ = nullptr;
1755
1756         /*! \brief Non-owning handle to MD log file.
1757          *
1758          * \todo Context should own output facilities for client.
1759          * \todo Improve log file handle management.
1760          * \internal
1761          * Code managing the FILE* relies on the ability to set it to
1762          * nullptr to check whether the filehandle is valid.
1763          */
1764         t_fileio* logFileHandle_ = nullptr;
1765
1766         /*!
1767          * \brief Builder for simulation stop signal handler.
1768          */
1769         std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
1770 };
1771
1772 Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules) :
1773     mdModules_(std::move(mdModules))
1774 {
1775 }
1776
1777 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
1778
1779 Mdrunner::BuilderImplementation &
1780 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions    &options,
1781                                                       const real             forceWarningThreshold,
1782                                                       const StartingBehavior startingBehavior)
1783 {
1784     mdrunOptions_          = options;
1785     forceWarningThreshold_ = forceWarningThreshold;
1786     startingBehavior_      = startingBehavior;
1787     return *this;
1788 }
1789
1790 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions &options)
1791 {
1792     domdecOptions_ = options;
1793 }
1794
1795 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
1796 {
1797     nstlist_ = nstlist;
1798 }
1799
1800 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters &params)
1801 {
1802     replicaExchangeParameters_ = params;
1803 }
1804
1805 void Mdrunner::BuilderImplementation::addMultiSim(gmx_multisim_t* multisim)
1806 {
1807     multisim_ = std::make_unique<gmx_multisim_t*>(multisim);
1808 }
1809
1810 void Mdrunner::BuilderImplementation::addCommunicationRecord(t_commrec *cr)
1811 {
1812     communicationRecord_ = cr;
1813 }
1814
1815 Mdrunner Mdrunner::BuilderImplementation::build()
1816 {
1817     auto newRunner = Mdrunner(std::move(mdModules_));
1818
1819     newRunner.mdrunOptions          = mdrunOptions_;
1820     newRunner.pforce                = forceWarningThreshold_;
1821     newRunner.startingBehavior      = startingBehavior_;
1822     newRunner.domdecOptions         = domdecOptions_;
1823
1824     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
1825     newRunner.hw_opt          = hardwareOptions_;
1826
1827     // No invariant to check. This parameter exists to optionally override other behavior.
1828     newRunner.nstlist_cmdline = nstlist_;
1829
1830     newRunner.replExParams    = replicaExchangeParameters_;
1831
1832     newRunner.filenames = filenames_;
1833
1834     GMX_ASSERT(communicationRecord_, "Bug found. It should not be possible to call build() without a valid communicationRecord_.");
1835     newRunner.cr = communicationRecord_;
1836
1837     if (multisim_)
1838     {
1839         // nullptr is a valid value for the multisim handle, so we don't check the pointed-to pointer.
1840         newRunner.ms = *multisim_;
1841     }
1842     else
1843     {
1844         GMX_THROW(gmx::APIError("MdrunnerBuilder::addMultiSim() is required before build()"));
1845     }
1846
1847     // \todo Clarify ownership and lifetime management for gmx_output_env_t
1848     // \todo Update sanity checking when output environment has clearly specified invariants.
1849     // Initialization and default values for oenv are not well specified in the current version.
1850     if (outputEnvironment_)
1851     {
1852         newRunner.oenv  = outputEnvironment_;
1853     }
1854     else
1855     {
1856         GMX_THROW(gmx::APIError("MdrunnerBuilder::addOutputEnvironment() is required before build()"));
1857     }
1858
1859     newRunner.logFileHandle = logFileHandle_;
1860
1861     if (nbpu_opt_)
1862     {
1863         newRunner.nbpu_opt    = nbpu_opt_;
1864     }
1865     else
1866     {
1867         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
1868     }
1869
1870     if (pme_opt_ && pme_fft_opt_)
1871     {
1872         newRunner.pme_opt     = pme_opt_;
1873         newRunner.pme_fft_opt = pme_fft_opt_;
1874     }
1875     else
1876     {
1877         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
1878     }
1879
1880     if (bonded_opt_)
1881     {
1882         newRunner.bonded_opt = bonded_opt_;
1883     }
1884     else
1885     {
1886         GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
1887     }
1888
1889     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
1890
1891     if (stopHandlerBuilder_)
1892     {
1893         newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
1894     }
1895     else
1896     {
1897         newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
1898     }
1899
1900     return newRunner;
1901 }
1902
1903 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
1904 {
1905     nbpu_opt_ = nbpu_opt;
1906 }
1907
1908 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
1909                                              const char* pme_fft_opt)
1910 {
1911     pme_opt_     = pme_opt;
1912     pme_fft_opt_ = pme_fft_opt;
1913 }
1914
1915 void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
1916 {
1917     bonded_opt_ = bonded_opt;
1918 }
1919
1920 void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt)
1921 {
1922     update_opt_ = update_opt;
1923 }
1924
1925 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
1926 {
1927     hardwareOptions_ = hardwareOptions;
1928 }
1929
1930 void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
1931 {
1932     filenames_ = filenames;
1933 }
1934
1935 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
1936 {
1937     outputEnvironment_ = outputEnvironment;
1938 }
1939
1940 void Mdrunner::BuilderImplementation::addLogFile(t_fileio *logFileHandle)
1941 {
1942     logFileHandle_ = logFileHandle;
1943 }
1944
1945 void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
1946 {
1947     stopHandlerBuilder_ = std::move(builder);
1948 }
1949
1950 MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules) :
1951     impl_ {std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules))}
1952 {
1953 }
1954
1955 MdrunnerBuilder::~MdrunnerBuilder() = default;
1956
1957 MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions    &options,
1958                                                       real                   forceWarningThreshold,
1959                                                       const StartingBehavior startingBehavior)
1960 {
1961     impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
1962     return *this;
1963 }
1964
1965 MdrunnerBuilder &MdrunnerBuilder::addDomainDecomposition(const DomdecOptions &options)
1966 {
1967     impl_->addDomdec(options);
1968     return *this;
1969 }
1970
1971 MdrunnerBuilder &MdrunnerBuilder::addNeighborList(int nstlist)
1972 {
1973     impl_->addVerletList(nstlist);
1974     return *this;
1975 }
1976
1977 MdrunnerBuilder &MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters &params)
1978 {
1979     impl_->addReplicaExchange(params);
1980     return *this;
1981 }
1982
1983 MdrunnerBuilder &MdrunnerBuilder::addMultiSim(gmx_multisim_t* multisim)
1984 {
1985     impl_->addMultiSim(multisim);
1986     return *this;
1987 }
1988
1989 MdrunnerBuilder &MdrunnerBuilder::addCommunicationRecord(t_commrec *cr)
1990 {
1991     impl_->addCommunicationRecord(cr);
1992     return *this;
1993 }
1994
1995 MdrunnerBuilder &MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
1996 {
1997     impl_->addNonBonded(nbpu_opt);
1998     return *this;
1999 }
2000
2001 MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
2002                                                     const char* pme_fft_opt)
2003 {
2004     // The builder method may become more general in the future, but in this version,
2005     // parameters for PME electrostatics are both required and the only parameters
2006     // available.
2007     if (pme_opt && pme_fft_opt)
2008     {
2009         impl_->addPME(pme_opt, pme_fft_opt);
2010     }
2011     else
2012     {
2013         GMX_THROW(gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
2014     }
2015     return *this;
2016 }
2017
2018 MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
2019 {
2020     impl_->addBondedTaskAssignment(bonded_opt);
2021     return *this;
2022 }
2023
2024 MdrunnerBuilder &MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt)
2025 {
2026     impl_->addUpdateTaskAssignment(update_opt);
2027     return *this;
2028 }
2029
2030 Mdrunner MdrunnerBuilder::build()
2031 {
2032     return impl_->build();
2033 }
2034
2035 MdrunnerBuilder &MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
2036 {
2037     impl_->addHardwareOptions(hardwareOptions);
2038     return *this;
2039 }
2040
2041 MdrunnerBuilder &MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
2042 {
2043     impl_->addFilenames(filenames);
2044     return *this;
2045 }
2046
2047 MdrunnerBuilder &MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2048 {
2049     impl_->addOutputEnvironment(outputEnvironment);
2050     return *this;
2051 }
2052
2053 MdrunnerBuilder &MdrunnerBuilder::addLogFile(t_fileio *logFileHandle)
2054 {
2055     impl_->addLogFile(logFileHandle);
2056     return *this;
2057 }
2058
2059 MdrunnerBuilder &MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2060 {
2061     impl_->addStopHandlerBuilder(std::move(builder));
2062     return *this;
2063 }
2064
2065 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder &&) noexcept = default;
2066
2067 MdrunnerBuilder &MdrunnerBuilder::operator=(MdrunnerBuilder &&) noexcept = default;
2068
2069 } // namespace gmx