Disable GPU update/constraints when neither PME nor buffer ops are offloaded.
[alexxy/gromacs.git] / src / gromacs / mdrun / runner.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief Implements the MD runner routine calling all integrators.
40  *
41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42  * \ingroup module_mdrun
43  */
44 #include "gmxpre.h"
45
46 #include "runner.h"
47
48 #include "config.h"
49
50 #include <cassert>
51 #include <cinttypes>
52 #include <csignal>
53 #include <cstdlib>
54 #include <cstring>
55
56 #include <algorithm>
57 #include <memory>
58
59 #include "gromacs/commandline/filenm.h"
60 #include "gromacs/domdec/domdec.h"
61 #include "gromacs/domdec/domdec_struct.h"
62 #include "gromacs/domdec/gpuhaloexchange.h"
63 #include "gromacs/domdec/localatomsetmanager.h"
64 #include "gromacs/domdec/partition.h"
65 #include "gromacs/ewald/ewald_utils.h"
66 #include "gromacs/ewald/pme.h"
67 #include "gromacs/ewald/pme_gpu_program.h"
68 #include "gromacs/fileio/checkpoint.h"
69 #include "gromacs/fileio/gmxfio.h"
70 #include "gromacs/fileio/oenv.h"
71 #include "gromacs/fileio/tpxio.h"
72 #include "gromacs/gmxlib/network.h"
73 #include "gromacs/gmxlib/nrnb.h"
74 #include "gromacs/gpu_utils/gpu_utils.h"
75 #include "gromacs/hardware/cpuinfo.h"
76 #include "gromacs/hardware/detecthardware.h"
77 #include "gromacs/hardware/printhardware.h"
78 #include "gromacs/imd/imd.h"
79 #include "gromacs/listed_forces/disre.h"
80 #include "gromacs/listed_forces/gpubonded.h"
81 #include "gromacs/listed_forces/orires.h"
82 #include "gromacs/math/functions.h"
83 #include "gromacs/math/utilities.h"
84 #include "gromacs/math/vec.h"
85 #include "gromacs/mdlib/boxdeformation.h"
86 #include "gromacs/mdlib/broadcaststructs.h"
87 #include "gromacs/mdlib/calc_verletbuf.h"
88 #include "gromacs/mdlib/dispersioncorrection.h"
89 #include "gromacs/mdlib/enerdata_utils.h"
90 #include "gromacs/mdlib/force.h"
91 #include "gromacs/mdlib/forcerec.h"
92 #include "gromacs/mdlib/gmx_omp_nthreads.h"
93 #include "gromacs/mdlib/makeconstraints.h"
94 #include "gromacs/mdlib/md_support.h"
95 #include "gromacs/mdlib/mdatoms.h"
96 #include "gromacs/mdlib/membed.h"
97 #include "gromacs/mdlib/qmmm.h"
98 #include "gromacs/mdlib/sighandler.h"
99 #include "gromacs/mdlib/stophandler.h"
100 #include "gromacs/mdrun/mdmodules.h"
101 #include "gromacs/mdrun/simulationcontext.h"
102 #include "gromacs/mdrunutility/handlerestart.h"
103 #include "gromacs/mdrunutility/logging.h"
104 #include "gromacs/mdrunutility/multisim.h"
105 #include "gromacs/mdrunutility/printtime.h"
106 #include "gromacs/mdrunutility/threadaffinity.h"
107 #include "gromacs/mdtypes/commrec.h"
108 #include "gromacs/mdtypes/enerdata.h"
109 #include "gromacs/mdtypes/fcdata.h"
110 #include "gromacs/mdtypes/group.h"
111 #include "gromacs/mdtypes/inputrec.h"
112 #include "gromacs/mdtypes/md_enums.h"
113 #include "gromacs/mdtypes/mdrunoptions.h"
114 #include "gromacs/mdtypes/observableshistory.h"
115 #include "gromacs/mdtypes/simulation_workload.h"
116 #include "gromacs/mdtypes/state.h"
117 #include "gromacs/nbnxm/gpu_data_mgmt.h"
118 #include "gromacs/nbnxm/nbnxm.h"
119 #include "gromacs/nbnxm/pairlist_tuning.h"
120 #include "gromacs/pbcutil/pbc.h"
121 #include "gromacs/pulling/output.h"
122 #include "gromacs/pulling/pull.h"
123 #include "gromacs/pulling/pull_rotation.h"
124 #include "gromacs/restraint/manager.h"
125 #include "gromacs/restraint/restraintmdmodule.h"
126 #include "gromacs/restraint/restraintpotential.h"
127 #include "gromacs/swap/swapcoords.h"
128 #include "gromacs/taskassignment/decidegpuusage.h"
129 #include "gromacs/taskassignment/resourcedivision.h"
130 #include "gromacs/taskassignment/taskassignment.h"
131 #include "gromacs/taskassignment/usergpuids.h"
132 #include "gromacs/timing/gpu_timing.h"
133 #include "gromacs/timing/wallcycle.h"
134 #include "gromacs/timing/wallcyclereporting.h"
135 #include "gromacs/topology/mtop_util.h"
136 #include "gromacs/trajectory/trajectoryframe.h"
137 #include "gromacs/utility/basenetwork.h"
138 #include "gromacs/utility/cstringutil.h"
139 #include "gromacs/utility/exceptions.h"
140 #include "gromacs/utility/fatalerror.h"
141 #include "gromacs/utility/filestream.h"
142 #include "gromacs/utility/gmxassert.h"
143 #include "gromacs/utility/gmxmpi.h"
144 #include "gromacs/utility/keyvaluetree.h"
145 #include "gromacs/utility/logger.h"
146 #include "gromacs/utility/loggerbuilder.h"
147 #include "gromacs/utility/mdmodulenotification.h"
148 #include "gromacs/utility/physicalnodecommunicator.h"
149 #include "gromacs/utility/pleasecite.h"
150 #include "gromacs/utility/programcontext.h"
151 #include "gromacs/utility/smalloc.h"
152 #include "gromacs/utility/stringutil.h"
153
154 #include "isimulator.h"
155 #include "replicaexchange.h"
156 #include "simulatorbuilder.h"
157
158 #if GMX_FAHCORE
159 #include "corewrap.h"
160 #endif
161
162 namespace gmx
163 {
164
165 /*! \brief environment variable to enable GPU P2P communication */
166 static const bool c_enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr)
167     && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
168
169 /*! \brief environment variable to enable GPU buffer operations */
170 static const bool c_enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
171
172 /*! \brief Manage any development feature flag variables encountered
173  *
174  * The use of dev features indicated by environment variables is
175  * logged in order to ensure that runs with such featrues enabled can
176  * be identified from their log and standard output. Any cross
177  * dependencies are also checked, and if unsatisified, a fatal error
178  * issued.
179  *
180  * \param[in]  mdlog        Logger object.
181  */
182 static void manageDevelopmentFeatures(const gmx::MDLogger &mdlog)
183 {
184     const bool enableGpuBufOps       = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
185     const bool useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
186     const bool enableGpuHaloExchange = (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
187
188     if (enableGpuBufOps)
189     {
190         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
191                 "NOTE: This run uses the 'GPU buffer ops' feature, enabled by the GMX_USE_GPU_BUFFER_OPS environment variable.");
192     }
193
194     if (enableGpuHaloExchange)
195     {
196         if (!enableGpuBufOps)
197         {
198             gmx_fatal(FARGS, "Cannot enable GPU halo exchange without GPU buffer operations, set GMX_USE_GPU_BUFFER_OPS=1\n");
199         }
200     }
201
202     if (useGpuUpdateConstrain)
203     {
204         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
205                 "NOTE: This run uses the 'GPU update/constraints' feature, enabled by the GMX_UPDATE_CONSTRAIN_GPU environment variable.");
206     }
207
208 }
209
210 /*! \brief Barrier for safe simultaneous thread access to mdrunner data
211  *
212  * Used to ensure that the master thread does not modify mdrunner during copy
213  * on the spawned threads. */
214 static void threadMpiMdrunnerAccessBarrier()
215 {
216 #if GMX_THREAD_MPI
217     MPI_Barrier(MPI_COMM_WORLD);
218 #endif
219 }
220
221 Mdrunner Mdrunner::cloneOnSpawnedThread() const
222 {
223     auto newRunner = Mdrunner(std::make_unique<MDModules>());
224
225     // All runners in the same process share a restraint manager resource because it is
226     // part of the interface to the client code, which is associated only with the
227     // original thread. Handles to the same resources can be obtained by copy.
228     {
229         newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
230     }
231
232     // Copy original cr pointer before master thread can pass the thread barrier
233     newRunner.cr  = reinitialize_commrec_for_this_thread(cr);
234
235     // Copy members of master runner.
236     // \todo Replace with builder when Simulation context and/or runner phases are better defined.
237     // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
238     newRunner.hw_opt    = hw_opt;
239     newRunner.filenames = filenames;
240
241     newRunner.oenv                = oenv;
242     newRunner.mdrunOptions        = mdrunOptions;
243     newRunner.domdecOptions       = domdecOptions;
244     newRunner.nbpu_opt            = nbpu_opt;
245     newRunner.pme_opt             = pme_opt;
246     newRunner.pme_fft_opt         = pme_fft_opt;
247     newRunner.bonded_opt          = bonded_opt;
248     newRunner.update_opt          = update_opt;
249     newRunner.nstlist_cmdline     = nstlist_cmdline;
250     newRunner.replExParams        = replExParams;
251     newRunner.pforce              = pforce;
252     newRunner.ms                  = ms;
253     newRunner.startingBehavior    = startingBehavior;
254     newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
255
256     threadMpiMdrunnerAccessBarrier();
257
258     GMX_RELEASE_ASSERT(!MASTER(newRunner.cr), "cloneOnSpawnedThread should only be called on spawned threads");
259
260     return newRunner;
261 }
262
263 /*! \brief The callback used for running on spawned threads.
264  *
265  * Obtains the pointer to the master mdrunner object from the one
266  * argument permitted to the thread-launch API call, copies it to make
267  * a new runner for this thread, reinitializes necessary data, and
268  * proceeds to the simulation. */
269 static void mdrunner_start_fn(const void *arg)
270 {
271     try
272     {
273         auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg);
274         /* copy the arg list to make sure that it's thread-local. This
275            doesn't copy pointed-to items, of course; fnm, cr and fplog
276            are reset in the call below, all others should be const. */
277         gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
278         mdrunner.mdrunner();
279     }
280     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
281 }
282
283
284 /*! \brief Start thread-MPI threads.
285  *
286  * Called by mdrunner() to start a specific number of threads
287  * (including the main thread) for thread-parallel runs. This in turn
288  * calls mdrunner() for each thread. All options are the same as for
289  * mdrunner(). */
290 t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const
291 {
292
293     /* first check whether we even need to start tMPI */
294     if (numThreadsToLaunch < 2)
295     {
296         return cr;
297     }
298
299 #if GMX_THREAD_MPI
300     /* now spawn new threads that start mdrunner_start_fn(), while
301        the main thread returns. Thread affinity is handled later. */
302     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
303                      mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS)
304     {
305         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
306     }
307
308     threadMpiMdrunnerAccessBarrier();
309 #else
310     GMX_UNUSED_VALUE(mdrunner_start_fn);
311 #endif
312
313     return reinitialize_commrec_for_this_thread(cr);
314 }
315
316 }  // namespace gmx
317
318 /*! \brief Initialize variables for Verlet scheme simulation */
319 static void prepare_verlet_scheme(FILE                           *fplog,
320                                   t_commrec                      *cr,
321                                   t_inputrec                     *ir,
322                                   int                             nstlist_cmdline,
323                                   const gmx_mtop_t               *mtop,
324                                   const matrix                    box,
325                                   bool                            makeGpuPairList,
326                                   const gmx::CpuInfo             &cpuinfo)
327 {
328     /* For NVE simulations, we will retain the initial list buffer */
329     if (EI_DYNAMICS(ir->eI) &&
330         ir->verletbuf_tol > 0 &&
331         !(EI_MD(ir->eI) && ir->etc == etcNO))
332     {
333         /* Update the Verlet buffer size for the current run setup */
334
335         /* Here we assume SIMD-enabled kernels are being used. But as currently
336          * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
337          * and 4x2 gives a larger buffer than 4x4, this is ok.
338          */
339         ListSetupType      listType  = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
340         VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
341
342         const real         rlist_new =
343             calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
344
345         if (rlist_new != ir->rlist)
346         {
347             if (fplog != nullptr)
348             {
349                 fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
350                         ir->rlist, rlist_new,
351                         listSetup.cluster_size_i, listSetup.cluster_size_j);
352             }
353             ir->rlist     = rlist_new;
354         }
355     }
356
357     if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
358     {
359         gmx_fatal(FARGS, "Can not set nstlist without %s",
360                   !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
361     }
362
363     if (EI_DYNAMICS(ir->eI))
364     {
365         /* Set or try nstlist values */
366         increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
367     }
368 }
369
370 /*! \brief Override the nslist value in inputrec
371  *
372  * with value passed on the command line (if any)
373  */
374 static void override_nsteps_cmdline(const gmx::MDLogger &mdlog,
375                                     int64_t              nsteps_cmdline,
376                                     t_inputrec          *ir)
377 {
378     assert(ir);
379
380     /* override with anything else than the default -2 */
381     if (nsteps_cmdline > -2)
382     {
383         char sbuf_steps[STEPSTRSIZE];
384         char sbuf_msg[STRLEN];
385
386         ir->nsteps = nsteps_cmdline;
387         if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
388         {
389             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
390                     gmx_step_str(nsteps_cmdline, sbuf_steps),
391                     fabs(nsteps_cmdline*ir->delta_t));
392         }
393         else
394         {
395             sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
396                     gmx_step_str(nsteps_cmdline, sbuf_steps));
397         }
398
399         GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
400     }
401     else if (nsteps_cmdline < -2)
402     {
403         gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64,
404                   nsteps_cmdline);
405     }
406     /* Do nothing if nsteps_cmdline == -2 */
407 }
408
409 namespace gmx
410 {
411
412 /*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
413  *
414  * If not, and if a warning may be issued, logs a warning about
415  * falling back to CPU code. With thread-MPI, only the first
416  * call to this function should have \c issueWarning true. */
417 static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
418                                                const t_inputrec &ir,
419                                                bool              issueWarning)
420 {
421     bool        gpuIsUseful = true;
422     std::string warning;
423
424     if (ir.opts.ngener - ir.nwall > 1)
425     {
426         /* The GPU code does not support more than one energy group.
427          * If the user requested GPUs explicitly, a fatal error is given later.
428          */
429         gpuIsUseful = false;
430         warning     =
431             "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
432             "For better performance, run on the GPU without energy groups and then do "
433             "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
434     }
435
436     if (EI_TPI(ir.eI))
437     {
438         gpuIsUseful = false;
439         warning     = "TPI is not implemented for GPUs.";
440     }
441
442     if (!gpuIsUseful && issueWarning)
443     {
444         GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
445     }
446
447     return gpuIsUseful;
448 }
449
450 //! Initializes the logger for mdrun.
451 static gmx::LoggerOwner buildLogger(FILE *fplog, const t_commrec *cr)
452 {
453     gmx::LoggerBuilder builder;
454     if (fplog != nullptr)
455     {
456         builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
457     }
458     if (cr == nullptr || SIMMASTER(cr))
459     {
460         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning,
461                                 &gmx::TextOutputFile::standardError());
462     }
463     return builder.build();
464 }
465
466 //! Make a TaskTarget from an mdrun argument string.
467 static TaskTarget findTaskTarget(const char *optionString)
468 {
469     TaskTarget returnValue = TaskTarget::Auto;
470
471     if (strncmp(optionString, "auto", 3) == 0)
472     {
473         returnValue = TaskTarget::Auto;
474     }
475     else if (strncmp(optionString, "cpu", 3) == 0)
476     {
477         returnValue = TaskTarget::Cpu;
478     }
479     else if (strncmp(optionString, "gpu", 3) == 0)
480     {
481         returnValue = TaskTarget::Gpu;
482     }
483     else
484     {
485         GMX_ASSERT(false, "Option string should have been checked for sanity already");
486     }
487
488     return returnValue;
489 }
490
491 //! Finish run, aggregate data to print performance info.
492 static void finish_run(FILE *fplog,
493                        const gmx::MDLogger &mdlog,
494                        const t_commrec *cr,
495                        const t_inputrec *inputrec,
496                        t_nrnb nrnb[], gmx_wallcycle_t wcycle,
497                        gmx_walltime_accounting_t walltime_accounting,
498                        nonbonded_verlet_t *nbv,
499                        const gmx_pme_t *pme,
500                        gmx_bool bWriteStat)
501 {
502     double  delta_t  = 0;
503     double  nbfs     = 0, mflop = 0;
504     double  elapsed_time,
505             elapsed_time_over_all_ranks,
506             elapsed_time_over_all_threads,
507             elapsed_time_over_all_threads_over_all_ranks;
508     /* Control whether it is valid to print a report. Only the
509        simulation master may print, but it should not do so if the run
510        terminated e.g. before a scheduled reset step. This is
511        complicated by the fact that PME ranks are unaware of the
512        reason why they were sent a pmerecvqxFINISH. To avoid
513        communication deadlocks, we always do the communication for the
514        report, even if we've decided not to write the report, because
515        how long it takes to finish the run is not important when we've
516        decided not to report on the simulation performance.
517
518        Further, we only report performance for dynamical integrators,
519        because those are the only ones for which we plan to
520        consider doing any optimizations. */
521     bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
522
523     if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
524     {
525         GMX_LOG(mdlog.warning).asParagraph().appendText("Simulation ended prematurely, no performance report will be written.");
526         printReport = false;
527     }
528
529     t_nrnb                  *nrnb_tot;
530     std::unique_ptr<t_nrnb>  nrnbTotalStorage;
531     if (cr->nnodes > 1)
532     {
533         nrnbTotalStorage = std::make_unique<t_nrnb>();
534         nrnb_tot         = nrnbTotalStorage.get();
535 #if GMX_MPI
536         MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
537                       cr->mpi_comm_mysim);
538 #endif
539     }
540     else
541     {
542         nrnb_tot = nrnb;
543     }
544
545     elapsed_time                  = walltime_accounting_get_time_since_reset(walltime_accounting);
546     elapsed_time_over_all_threads = walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
547     if (cr->nnodes > 1)
548     {
549 #if GMX_MPI
550         /* reduce elapsed_time over all MPI ranks in the current simulation */
551         MPI_Allreduce(&elapsed_time,
552                       &elapsed_time_over_all_ranks,
553                       1, MPI_DOUBLE, MPI_SUM,
554                       cr->mpi_comm_mysim);
555         elapsed_time_over_all_ranks /= cr->nnodes;
556         /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
557          * current simulation. */
558         MPI_Allreduce(&elapsed_time_over_all_threads,
559                       &elapsed_time_over_all_threads_over_all_ranks,
560                       1, MPI_DOUBLE, MPI_SUM,
561                       cr->mpi_comm_mysim);
562 #endif
563     }
564     else
565     {
566         elapsed_time_over_all_ranks                  = elapsed_time;
567         elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
568     }
569
570     if (printReport)
571     {
572         print_flop(fplog, nrnb_tot, &nbfs, &mflop);
573     }
574
575     if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
576     {
577         print_dd_statistics(cr, inputrec, fplog);
578     }
579
580     /* TODO Move the responsibility for any scaling by thread counts
581      * to the code that handled the thread region, so that there's a
582      * mechanism to keep cycle counting working during the transition
583      * to task parallelism. */
584     int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
585     int nthreads_pme = gmx_omp_nthreads_get(emntPME);
586     wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), nthreads_pp, nthreads_pme);
587     auto cycle_sum(wallcycle_sum(cr, wcycle));
588
589     if (printReport)
590     {
591         auto                    nbnxn_gpu_timings = (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
592         gmx_wallclock_gpu_pme_t pme_gpu_timings   = {};
593
594         if (pme_gpu_task_enabled(pme))
595         {
596             pme_gpu_get_timings(pme, &pme_gpu_timings);
597         }
598         wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
599                         elapsed_time_over_all_ranks,
600                         wcycle, cycle_sum,
601                         nbnxn_gpu_timings,
602                         &pme_gpu_timings);
603
604         if (EI_DYNAMICS(inputrec->eI))
605         {
606             delta_t = inputrec->delta_t;
607         }
608
609         if (fplog)
610         {
611             print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
612                        elapsed_time_over_all_ranks,
613                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
614                        delta_t, nbfs, mflop);
615         }
616         if (bWriteStat)
617         {
618             print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
619                        elapsed_time_over_all_ranks,
620                        walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
621                        delta_t, nbfs, mflop);
622         }
623     }
624 }
625
626 int Mdrunner::mdrunner()
627 {
628     matrix                    box;
629     t_forcerec               *fr               = nullptr;
630     t_fcdata                 *fcd              = nullptr;
631     real                      ewaldcoeff_q     = 0;
632     real                      ewaldcoeff_lj    = 0;
633     int                       nChargePerturbed = -1, nTypePerturbed = 0;
634     gmx_wallcycle_t           wcycle;
635     gmx_walltime_accounting_t walltime_accounting = nullptr;
636     gmx_membed_t *            membed              = nullptr;
637     gmx_hw_info_t            *hwinfo              = nullptr;
638
639     /* CAUTION: threads may be started later on in this function, so
640        cr doesn't reflect the final parallel state right now */
641     gmx_mtop_t                      mtop;
642
643     bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data());
644     bool doRerun  = mdrunOptions.rerun;
645
646     // Handle task-assignment related user options.
647     EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
648                                                EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
649
650     std::vector<int> userGpuTaskAssignment;
651     try
652     {
653         userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
654     }
655     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
656     auto       nonbondedTarget = findTaskTarget(nbpu_opt);
657     auto       pmeTarget       = findTaskTarget(pme_opt);
658     auto       pmeFftTarget    = findTaskTarget(pme_fft_opt);
659     auto       bondedTarget    = findTaskTarget(bonded_opt);
660     auto       updateTarget    = findTaskTarget(update_opt);
661     PmeRunMode pmeRunMode      = PmeRunMode::None;
662
663     // Here we assume that SIMMASTER(cr) does not change even after the
664     // threads are started.
665
666     FILE *fplog = nullptr;
667     // If we are appending, we don't write log output because we need
668     // to check that the old log file matches what the checkpoint file
669     // expects. Otherwise, we should start to write log output now if
670     // there is a file ready for it.
671     if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
672     {
673         fplog = gmx_fio_getfp(logFileHandle);
674     }
675     gmx::LoggerOwner logOwner(buildLogger(fplog, cr));
676     gmx::MDLogger    mdlog(logOwner.logger());
677
678     // report any development features that may be enabled by environment variables
679     manageDevelopmentFeatures(mdlog);
680
681     // With thread-MPI, the communicator changes after threads are
682     // launched, so this is rebuilt for the master rank at that
683     // time. The non-master ranks are fine to keep the one made here.
684     PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
685     hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
686
687     gmx_print_detected_hardware(fplog, isMasterSimMasterRank(ms, MASTER(cr)), mdlog, hwinfo);
688
689     std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
690
691     // Print citation requests after all software/hardware printing
692     pleaseCiteGromacs(fplog);
693
694     // TODO Replace this by unique_ptr once t_inputrec is C++
695     t_inputrec               inputrecInstance;
696     t_inputrec              *inputrec = nullptr;
697     std::unique_ptr<t_state> globalState;
698
699     if (SIMMASTER(cr))
700     {
701         /* Only the master rank has the global state */
702         globalState = std::make_unique<t_state>();
703
704         /* Read (nearly) all data required for the simulation */
705         read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()),
706                        &inputrecInstance, globalState.get(), &mtop);
707         inputrec = &inputrecInstance;
708     }
709
710     /* Check and update the hardware options for internal consistency */
711     checkAndUpdateHardwareOptions(mdlog, &hw_opt, SIMMASTER(cr), domdecOptions.numPmeRanks, inputrec);
712
713     if (GMX_THREAD_MPI && SIMMASTER(cr))
714     {
715         bool useGpuForNonbonded = false;
716         bool useGpuForPme       = false;
717         try
718         {
719             GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
720
721             // If the user specified the number of ranks, then we must
722             // respect that, but in default mode, we need to allow for
723             // the number of GPUs to choose the number of ranks.
724             auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
725             useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
726                     (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
727                     canUseGpuForNonbonded,
728                     gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
729                     hw_opt.nthreads_tmpi);
730             useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
731                     (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
732                     *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
733
734         }
735         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
736
737         /* Determine how many thread-MPI ranks to start.
738          *
739          * TODO Over-writing the user-supplied value here does
740          * prevent any possible subsequent checks from working
741          * correctly. */
742         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
743                                                 &hw_opt,
744                                                 gpuIdsToUse,
745                                                 useGpuForNonbonded,
746                                                 useGpuForPme,
747                                                 inputrec, &mtop,
748                                                 mdlog,
749                                                 doMembed);
750
751         // Now start the threads for thread MPI.
752         cr = spawnThreads(hw_opt.nthreads_tmpi);
753         /* The main thread continues here with a new cr. We don't deallocate
754            the old cr because other threads may still be reading it. */
755         // TODO Both master and spawned threads call dup_tfn and
756         // reinitialize_commrec_for_this_thread. Find a way to express
757         // this better.
758         physicalNodeComm = PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
759     }
760     // END OF CAUTION: cr and physicalNodeComm are now reliable
761
762     if (PAR(cr))
763     {
764         /* now broadcast everything to the non-master nodes/threads: */
765         if (!SIMMASTER(cr))
766         {
767             inputrec = &inputrecInstance;
768         }
769         init_parallel(cr, inputrec, &mtop);
770     }
771     GMX_RELEASE_ASSERT(inputrec != nullptr, "All range should have a valid inputrec now");
772
773     // Now each rank knows the inputrec that SIMMASTER read and used,
774     // and (if applicable) cr->nnodes has been assigned the number of
775     // thread-MPI ranks that have been chosen. The ranks can now all
776     // run the task-deciding functions and will agree on the result
777     // without needing to communicate.
778     //
779     // TODO Should we do the communication in debug mode to support
780     // having an assertion?
781     //
782     // Note that these variables describe only their own node.
783     //
784     // Note that when bonded interactions run on a GPU they always run
785     // alongside a nonbonded task, so do not influence task assignment
786     // even though they affect the force calculation workload.
787     bool useGpuForNonbonded     = false;
788     bool useGpuForPme           = false;
789     bool useGpuForBonded        = false;
790     bool useGpuForUpdate        = false;
791     bool gpusWereDetected       = hwinfo->ngpu_compatible_tot > 0;
792     try
793     {
794         // It's possible that there are different numbers of GPUs on
795         // different nodes, which is the user's responsibilty to
796         // handle. If unsuitable, we will notice that during task
797         // assignment.
798         auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
799         useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment,
800                                                                 emulateGpuNonbonded,
801                                                                 canUseGpuForNonbonded,
802                                                                 gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI),
803                                                                 gpusWereDetected);
804         useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment,
805                                                     *hwinfo, *inputrec, mtop,
806                                                     cr->nnodes, domdecOptions.numPmeRanks,
807                                                     gpusWereDetected);
808         auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
809         useGpuForBonded =
810             decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
811                                             bondedTarget, canUseGpuForBonded,
812                                             EVDW_PME(inputrec->vdwtype),
813                                             EEL_PME_EWALD(inputrec->coulombtype),
814                                             domdecOptions.numPmeRanks, gpusWereDetected);
815
816         pmeRunMode   = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
817         if (pmeRunMode == PmeRunMode::GPU)
818         {
819             if (pmeFftTarget == TaskTarget::Cpu)
820             {
821                 pmeRunMode = PmeRunMode::Mixed;
822             }
823         }
824         else if (pmeFftTarget == TaskTarget::Gpu)
825         {
826             gmx_fatal(FARGS, "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME on CPU you should not be using -pmefft.");
827         }
828     }
829     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
830
831     // Build restraints.
832     // TODO: hide restraint implementation details from Mdrunner.
833     // There is nothing unique about restraints at this point as far as the
834     // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
835     // factory functions from the SimulationContext on which to call mdModules_->add().
836     // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
837     for (auto && restraint : restraintManager_->getRestraints())
838     {
839         auto module = RestraintMDModule::create(restraint,
840                                                 restraint->sites());
841         mdModules_->add(std::move(module));
842     }
843
844     // TODO: Error handling
845     mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
846     const auto &mdModulesNotifier = mdModules_->notifier().notifier_;
847
848     if (inputrec->internalParameters != nullptr)
849     {
850         mdModulesNotifier.notify(*inputrec->internalParameters);
851     }
852
853     if (fplog != nullptr)
854     {
855         pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
856         fprintf(fplog, "\n");
857     }
858
859     if (SIMMASTER(cr))
860     {
861         /* In rerun, set velocities to zero if present */
862         if (doRerun && ((globalState->flags & (1 << estV)) != 0))
863         {
864             // rerun does not use velocities
865             GMX_LOG(mdlog.info).asParagraph().appendText(
866                     "Rerun trajectory contains velocities. Rerun does only evaluate "
867                     "potential energy and forces. The velocities will be ignored.");
868             for (int i = 0; i < globalState->natoms; i++)
869             {
870                 clear_rvec(globalState->v[i]);
871             }
872             globalState->flags &= ~(1 << estV);
873         }
874
875         /* now make sure the state is initialized and propagated */
876         set_state_entries(globalState.get(), inputrec);
877     }
878
879     /* NM and TPI parallelize over force/energy calculations, not atoms,
880      * so we need to initialize and broadcast the global state.
881      */
882     if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
883     {
884         if (!MASTER(cr))
885         {
886             globalState = std::make_unique<t_state>();
887         }
888         broadcastStateWithoutDynamics(cr, globalState.get());
889     }
890
891     /* A parallel command line option consistency check that we can
892        only do after any threads have started. */
893     if (!PAR(cr) && (domdecOptions.numCells[XX] > 1 ||
894                      domdecOptions.numCells[YY] > 1 ||
895                      domdecOptions.numCells[ZZ] > 1 ||
896                      domdecOptions.numPmeRanks > 0))
897     {
898         gmx_fatal(FARGS,
899                   "The -dd or -npme option request a parallel simulation, "
900 #if !GMX_MPI
901                   "but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv));
902 #elif GMX_THREAD_MPI
903                   "but the number of MPI-threads (option -ntmpi) is not set or is 1");
904 #else
905                   "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv));
906 #endif
907     }
908
909     if (doRerun &&
910         (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
911     {
912         gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
913     }
914
915     if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
916     {
917         if (domdecOptions.numPmeRanks > 0)
918         {
919             gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
920                                  "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
921         }
922
923         domdecOptions.numPmeRanks = 0;
924     }
925
926     if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
927     {
928         /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
929          * improve performance with many threads per GPU, since our OpenMP
930          * scaling is bad, but it's difficult to automate the setup.
931          */
932         domdecOptions.numPmeRanks = 0;
933     }
934     if (useGpuForPme)
935     {
936         if (domdecOptions.numPmeRanks < 0)
937         {
938             domdecOptions.numPmeRanks = 0;
939             // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
940         }
941         else
942         {
943             GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported");
944         }
945     }
946
947 #if GMX_FAHCORE
948     if (MASTER(cr))
949     {
950         fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
951     }
952 #endif
953
954     /* NMR restraints must be initialized before load_checkpoint,
955      * since with time averaging the history is added to t_state.
956      * For proper consistency check we therefore need to extend
957      * t_state here.
958      * So the PME-only nodes (if present) will also initialize
959      * the distance restraints.
960      */
961     snew(fcd, 1);
962
963     /* This needs to be called before read_checkpoint to extend the state */
964     init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
965
966     init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
967
968     auto                 deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
969
970     ObservablesHistory   observablesHistory = {};
971
972     if (startingBehavior != StartingBehavior::NewSimulation)
973     {
974         /* Check if checkpoint file exists before doing continuation.
975          * This way we can use identical input options for the first and subsequent runs...
976          */
977         if (mdrunOptions.numStepsCommandline > -2)
978         {
979             /* Temporarily set the number of steps to unmlimited to avoid
980              * triggering the nsteps check in load_checkpoint().
981              * This hack will go away soon when the -nsteps option is removed.
982              */
983             inputrec->nsteps = -1;
984         }
985
986         load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr),
987                         logFileHandle,
988                         cr, domdecOptions.numCells,
989                         inputrec, globalState.get(),
990                         &observablesHistory,
991                         mdrunOptions.reproducible, mdModules_->notifier());
992
993         if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
994         {
995             // Now we can start normal logging to the truncated log file.
996             fplog    = gmx_fio_getfp(logFileHandle);
997             prepareLogAppending(fplog);
998             logOwner = buildLogger(fplog, cr);
999             mdlog    = logOwner.logger();
1000         }
1001     }
1002
1003     if (mdrunOptions.numStepsCommandline > -2)
1004     {
1005         GMX_LOG(mdlog.info).asParagraph().
1006             appendText("The -nsteps functionality is deprecated, and may be removed in a future version. "
1007                        "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp file field.");
1008     }
1009     /* override nsteps with value set on the commamdline */
1010     override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
1011
1012     if (SIMMASTER(cr))
1013     {
1014         copy_mat(globalState->box, box);
1015     }
1016
1017     if (PAR(cr))
1018     {
1019         gmx_bcast(sizeof(box), box, cr);
1020     }
1021
1022     if (inputrec->cutoff_scheme != ecutsVERLET)
1023     {
1024         gmx_fatal(FARGS, "This group-scheme .tpr file can no longer be run by mdrun. Please update to the Verlet scheme, or use an earlier version of GROMACS if necessary.");
1025     }
1026     /* Update rlist and nstlist. */
1027     prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
1028                           useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
1029
1030     const bool          prefer1DAnd1PulseDD = (c_enableGpuHaloExchange && useGpuForNonbonded);
1031     LocalAtomSetManager atomSets;
1032     if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
1033                      inputrec->eI == eiNM))
1034     {
1035         cr->dd = init_domain_decomposition(mdlog, cr, domdecOptions, mdrunOptions,
1036                                            prefer1DAnd1PulseDD,
1037                                            &mtop, inputrec,
1038                                            box, positionsFromStatePointer(globalState.get()),
1039                                            &atomSets);
1040         // Note that local state still does not exist yet.
1041     }
1042     else
1043     {
1044         /* PME, if used, is done on all nodes with 1D decomposition */
1045         cr->npmenodes = 0;
1046         cr->duty      = (DUTY_PP | DUTY_PME);
1047
1048         if (inputrec->ePBC == epbcSCREW)
1049         {
1050             gmx_fatal(FARGS,
1051                       "pbc=screw is only implemented with domain decomposition");
1052         }
1053     }
1054
1055     if (PAR(cr))
1056     {
1057         /* After possible communicator splitting in make_dd_communicators.
1058          * we can set up the intra/inter node communication.
1059          */
1060         gmx_setup_nodecomm(fplog, cr);
1061     }
1062
1063 #if GMX_MPI
1064     if (isMultiSim(ms))
1065     {
1066         GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1067                 "This is simulation %d out of %d running as a composite GROMACS\n"
1068                 "multi-simulation job. Setup for this simulation:\n",
1069                 ms->sim, ms->nsim);
1070     }
1071     GMX_LOG(mdlog.warning).appendTextFormatted(
1072             "Using %d MPI %s\n",
1073             cr->nnodes,
1074 #if GMX_THREAD_MPI
1075             cr->nnodes == 1 ? "thread" : "threads"
1076 #else
1077             cr->nnodes == 1 ? "process" : "processes"
1078 #endif
1079             );
1080     fflush(stderr);
1081 #endif
1082
1083     // If mdrun -pin auto honors any affinity setting that already
1084     // exists. If so, it is nice to provide feedback about whether
1085     // that existing affinity setting was from OpenMP or something
1086     // else, so we run this code both before and after we initialize
1087     // the OpenMP support.
1088     gmx_check_thread_affinity_set(mdlog,
1089                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
1090     /* Check and update the number of OpenMP threads requested */
1091     checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
1092                                             pmeRunMode, mtop, *inputrec);
1093
1094     gmx_omp_nthreads_init(mdlog, cr,
1095                           hwinfo->nthreads_hw_avail,
1096                           physicalNodeComm.size_,
1097                           hw_opt.nthreads_omp,
1098                           hw_opt.nthreads_omp_pme,
1099                           !thisRankHasDuty(cr, DUTY_PP));
1100
1101     // Enable FP exception detection, but not in
1102     // Release mode and not for compilers with known buggy FP
1103     // exception support (clang with any optimization) or suspected
1104     // buggy FP exception support (gcc 7.* with optimization).
1105 #if !defined NDEBUG && \
1106     !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
1107     && defined __OPTIMIZE__)
1108     const bool bEnableFPE = true;
1109 #else
1110     const bool bEnableFPE = false;
1111 #endif
1112     //FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
1113     if (bEnableFPE)
1114     {
1115         gmx_feenableexcept();
1116     }
1117
1118     // TODO This could move before init_domain_decomposition() as part
1119     // of refactoring that separates the responsibility for duty
1120     // assignment from setup for communication between tasks, and
1121     // setup for tasks handled with a domain (ie including short-ranged
1122     // tasks, bonded tasks, etc.).
1123     //
1124     // Note that in general useGpuForNonbonded, etc. can have a value
1125     // that is inconsistent with the presence of actual GPUs on any
1126     // rank, and that is not known to be a problem until the
1127     // duty of the ranks on a node become known.
1128
1129     // Produce the task assignment for this rank.
1130     GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
1131     GpuTaskAssignments        gpuTaskAssignments =
1132         gpuTaskAssignmentsBuilder.build(gpuIdsToUse,
1133                                         userGpuTaskAssignment,
1134                                         *hwinfo,
1135                                         cr,
1136                                         ms,
1137                                         physicalNodeComm,
1138                                         nonbondedTarget,
1139                                         pmeTarget,
1140                                         bondedTarget,
1141                                         updateTarget,
1142                                         useGpuForNonbonded,
1143                                         useGpuForPme,
1144                                         thisRankHasDuty(cr, DUTY_PP),
1145                                         // TODO cr->duty & DUTY_PME should imply that a PME
1146                                         // algorithm is active, but currently does not.
1147                                         EEL_PME(inputrec->coulombtype) &&
1148                                         thisRankHasDuty(cr, DUTY_PME));
1149
1150     const bool printHostName = (cr->nnodes > 1);
1151     gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode);
1152
1153     // If the user chose a task assignment, give them some hints
1154     // where appropriate.
1155     if (!userGpuTaskAssignment.empty())
1156     {
1157         gpuTaskAssignments.logPerformanceHints(mdlog,
1158                                                ssize(gpuIdsToUse));
1159     }
1160
1161     /* Now that we know the setup is consistent, check for efficiency */
1162     check_resource_division_efficiency(hwinfo,
1163                                        gpuTaskAssignments.thisRankHasAnyGpuTask(),
1164                                        mdrunOptions.ntompOptionIsSet,
1165                                        cr,
1166                                        mdlog);
1167
1168     // Get the device handles for the modules, nullptr when no task is assigned.
1169     gmx_device_info_t *nonbondedDeviceInfo   = gpuTaskAssignments.initNonbondedDevice(cr);
1170     gmx_device_info_t *pmeDeviceInfo         = gpuTaskAssignments.initPmeDevice();
1171     const bool         thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
1172
1173     // TODO should live in ewald module once its testing is improved
1174     //
1175     // Later, this program could contain kernels that might be later
1176     // re-used as auto-tuning progresses, or subsequent simulations
1177     // are invoked.
1178     PmeGpuProgramStorage pmeGpuProgram;
1179     if (thisRankHasPmeGpuTask)
1180     {
1181         pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
1182     }
1183
1184     /* getting number of PP/PME threads on this MPI / tMPI rank.
1185        PME: env variable should be read only on one node to make sure it is
1186        identical everywhere;
1187      */
1188     const int numThreadsOnThisRank =
1189         thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded) : gmx_omp_nthreads_get(emntPME);
1190     checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid,
1191                                   *hwinfo->hardwareTopology,
1192                                   physicalNodeComm, mdlog);
1193
1194     if (hw_opt.threadAffinity != ThreadAffinity::Off)
1195     {
1196         /* Before setting affinity, check whether the affinity has changed
1197          * - which indicates that probably the OpenMP library has changed it
1198          * since we first checked).
1199          */
1200         gmx_check_thread_affinity_set(mdlog,
1201                                       &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
1202
1203         int numThreadsOnThisNode, intraNodeThreadOffset;
1204         analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
1205                                  &intraNodeThreadOffset);
1206
1207         /* Set the CPU affinity */
1208         gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology,
1209                                 numThreadsOnThisRank, numThreadsOnThisNode,
1210                                 intraNodeThreadOffset, nullptr);
1211     }
1212
1213     if (mdrunOptions.timingOptions.resetStep > -1)
1214     {
1215         GMX_LOG(mdlog.info).asParagraph().
1216             appendText("The -resetstep functionality is deprecated, and may be removed in a future version.");
1217     }
1218     wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
1219
1220     if (PAR(cr))
1221     {
1222         /* Master synchronizes its value of reset_counters with all nodes
1223          * including PME only nodes */
1224         int64_t reset_counters = wcycle_get_reset_counters(wcycle);
1225         gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
1226         wcycle_set_reset_counters(wcycle, reset_counters);
1227     }
1228
1229     // Membrane embedding must be initialized before we call init_forcerec()
1230     if (doMembed)
1231     {
1232         if (MASTER(cr))
1233         {
1234             fprintf(stderr, "Initializing membed");
1235         }
1236         /* Note that membed cannot work in parallel because mtop is
1237          * changed here. Fix this if we ever want to make it run with
1238          * multiple ranks. */
1239         membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec, globalState.get(), cr,
1240                              &mdrunOptions
1241                                  .checkpointOptions.period);
1242     }
1243
1244     std::unique_ptr<MDAtoms>     mdAtoms;
1245     std::unique_ptr<gmx_vsite_t> vsite;
1246
1247     t_nrnb nrnb;
1248     if (thisRankHasDuty(cr, DUTY_PP))
1249     {
1250         mdModulesNotifier.notify(*cr);
1251         mdModulesNotifier.notify(&atomSets);
1252         mdModulesNotifier.notify(PeriodicBoundaryConditionType {inputrec->ePBC});
1253         /* Initiate forcerecord */
1254         fr                 = new t_forcerec;
1255         fr->forceProviders = mdModules_->initForceProviders();
1256         init_forcerec(fplog, mdlog, fr, fcd,
1257                       inputrec, &mtop, cr, box,
1258                       opt2fn("-table", filenames.size(), filenames.data()),
1259                       opt2fn("-tablep", filenames.size(), filenames.data()),
1260                       opt2fns("-tableb", filenames.size(), filenames.data()),
1261                       *hwinfo, nonbondedDeviceInfo,
1262                       useGpuForBonded,
1263                       pforce,
1264                       wcycle);
1265
1266         // TODO Move this to happen during domain decomposition setup,
1267         // once stream and event handling works well with that.
1268         // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
1269         if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
1270         {
1271             GMX_RELEASE_ASSERT(c_enableGpuBufOps, "Must use GMX_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
1272             void *streamLocal                   = Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
1273             void *streamNonLocal                =
1274                 Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, Nbnxm::InteractionLocality::NonLocal);
1275             void *coordinatesOnDeviceEvent = fr->nbv->get_x_on_device_event();
1276             GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
1277                     "NOTE: This run uses the 'GPU halo exchange' feature, enabled by the GMX_GPU_DD_COMMS environment variable.");
1278             cr->dd->gpuHaloExchange = std::make_unique<GpuHaloExchange>(cr->dd, cr->mpi_comm_mysim, streamLocal,
1279                                                                         streamNonLocal, coordinatesOnDeviceEvent);
1280         }
1281
1282         /* Initialize the mdAtoms structure.
1283          * mdAtoms is not filled with atom data,
1284          * as this can not be done now with domain decomposition.
1285          */
1286         mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
1287         if (globalState && thisRankHasPmeGpuTask)
1288         {
1289             // The pinning of coordinates in the global state object works, because we only use
1290             // PME on GPU without DD or on a separate PME rank, and because the local state pointer
1291             // points to the global state object without DD.
1292             // FIXME: MD and EM separately set up the local state - this should happen in the same function,
1293             // which should also perform the pinning.
1294             changePinningPolicy(&globalState->x, pme_get_pinning_policy());
1295         }
1296
1297         /* Initialize the virtual site communication */
1298         vsite = initVsite(mtop, cr);
1299
1300         calc_shifts(box, fr->shift_vec);
1301
1302         /* With periodic molecules the charge groups should be whole at start up
1303          * and the virtual sites should not be far from their proper positions.
1304          */
1305         if (!inputrec->bContinuation && MASTER(cr) &&
1306             !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
1307         {
1308             /* Make molecules whole at start of run */
1309             if (fr->ePBC != epbcNONE)
1310             {
1311                 do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
1312             }
1313             if (vsite)
1314             {
1315                 /* Correct initial vsite positions are required
1316                  * for the initial distribution in the domain decomposition
1317                  * and for the initial shell prediction.
1318                  */
1319                 constructVsitesGlobal(mtop, globalState->x);
1320             }
1321         }
1322
1323         if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
1324         {
1325             ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
1326             ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
1327         }
1328     }
1329     else
1330     {
1331         /* This is a PME only node */
1332
1333         GMX_ASSERT(globalState == nullptr, "We don't need the state on a PME only rank and expect it to be unitialized");
1334
1335         ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
1336         ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
1337     }
1338
1339     gmx_pme_t *sepPmeData = nullptr;
1340     // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
1341     GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), "Double-checking that only PME-only ranks have no forcerec");
1342     gmx_pme_t * &pmedata = fr ? fr->pmedata : sepPmeData;
1343
1344     /* Initiate PME if necessary,
1345      * either on all nodes or on dedicated PME nodes only. */
1346     if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
1347     {
1348         if (mdAtoms && mdAtoms->mdatoms())
1349         {
1350             nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
1351             if (EVDW_PME(inputrec->vdwtype))
1352             {
1353                 nTypePerturbed   = mdAtoms->mdatoms()->nTypePerturbed;
1354             }
1355         }
1356         if (cr->npmenodes > 0)
1357         {
1358             /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
1359             gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
1360             gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
1361         }
1362
1363         if (thisRankHasDuty(cr, DUTY_PME))
1364         {
1365             try
1366             {
1367                 pmedata = gmx_pme_init(cr,
1368                                        getNumPmeDomains(cr->dd),
1369                                        inputrec,
1370                                        nChargePerturbed != 0, nTypePerturbed != 0,
1371                                        mdrunOptions.reproducible,
1372                                        ewaldcoeff_q, ewaldcoeff_lj,
1373                                        gmx_omp_nthreads_get(emntPME),
1374                                        pmeRunMode, nullptr,
1375                                        pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
1376             }
1377             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1378         }
1379     }
1380
1381
1382     if (EI_DYNAMICS(inputrec->eI))
1383     {
1384         /* Turn on signal handling on all nodes */
1385         /*
1386          * (A user signal from the PME nodes (if any)
1387          * is communicated to the PP nodes.
1388          */
1389         signal_handler_install();
1390     }
1391
1392     pull_t *pull_work = nullptr;
1393     if (thisRankHasDuty(cr, DUTY_PP))
1394     {
1395         /* Assumes uniform use of the number of OpenMP threads */
1396         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
1397
1398         if (inputrec->bPull)
1399         {
1400             /* Initialize pull code */
1401             pull_work =
1402                 init_pull(fplog, inputrec->pull, inputrec,
1403                           &mtop, cr, &atomSets, inputrec->fepvals->init_lambda);
1404             if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
1405             {
1406                 initPullHistory(pull_work, &observablesHistory);
1407             }
1408             if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
1409             {
1410                 init_pull_output_files(pull_work,
1411                                        filenames.size(), filenames.data(), oenv,
1412                                        startingBehavior);
1413             }
1414         }
1415
1416         std::unique_ptr<EnforcedRotation> enforcedRotation;
1417         if (inputrec->bRot)
1418         {
1419             /* Initialize enforced rotation code */
1420             enforcedRotation = init_rot(fplog,
1421                                         inputrec,
1422                                         filenames.size(),
1423                                         filenames.data(),
1424                                         cr,
1425                                         &atomSets,
1426                                         globalState.get(),
1427                                         &mtop,
1428                                         oenv,
1429                                         mdrunOptions,
1430                                         startingBehavior);
1431         }
1432
1433         t_swap *swap = nullptr;
1434         if (inputrec->eSwapCoords != eswapNO)
1435         {
1436             /* Initialize ion swapping code */
1437             swap = init_swapcoords(fplog, inputrec,
1438                                    opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
1439                                    &mtop, globalState.get(), &observablesHistory,
1440                                    cr, &atomSets, oenv, mdrunOptions,
1441                                    startingBehavior);
1442         }
1443
1444         /* Let makeConstraints know whether we have essential dynamics constraints.
1445          * TODO: inputrec should tell us whether we use an algorithm, not a file option or the checkpoint
1446          */
1447         bool doEssentialDynamics = (opt2fn_null("-ei", filenames.size(), filenames.data()) != nullptr
1448                                     || observablesHistory.edsamHistory);
1449         auto constr              = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics,
1450                                                    fplog, *mdAtoms->mdatoms(),
1451                                                    cr, ms, &nrnb, wcycle, fr->bMolPBC);
1452
1453         /* Energy terms and groups */
1454         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(), inputrec->fepvals->n_lambda);
1455
1456         /* Kinetic energy data */
1457         gmx_ekindata_t ekind;
1458         init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel);
1459
1460         /* Set up interactive MD (IMD) */
1461         auto imdSession = makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
1462                                          MASTER(cr) ? globalState->x.rvec_array() : nullptr,
1463                                          filenames.size(), filenames.data(), oenv, mdrunOptions.imdOptions,
1464                                          startingBehavior);
1465
1466         if (DOMAINDECOMP(cr))
1467         {
1468             GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
1469             /* This call is not included in init_domain_decomposition mainly
1470              * because fr->cginfo_mb is set later.
1471              */
1472             dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
1473                             domdecOptions.checkBondedInteractions,
1474                             fr->cginfo_mb);
1475         }
1476
1477         if (updateTarget == TaskTarget::Gpu)
1478         {
1479             if (SIMMASTER(cr))
1480             {
1481                 gmx_fatal(FARGS, "It is currently not possible to redirect the calculation "
1482                           "of update and constraints to the GPU!");
1483             }
1484         }
1485
1486         // Before we start the actual simulator, try if we can run the update task on the GPU.
1487         useGpuForUpdate = decideWhetherToUseGpuForUpdate(DOMAINDECOMP(cr),
1488                                                          useGpuForPme,
1489                                                          useGpuForNonbonded,
1490                                                          c_enableGpuBufOps,
1491                                                          updateTarget,
1492                                                          gpusWereDetected,
1493                                                          *inputrec,
1494                                                          *mdAtoms,
1495                                                          doEssentialDynamics,
1496                                                          fcd->orires.nr != 0,
1497                                                          fcd->disres.nsystems != 0);
1498
1499         // TODO This is not the right place to manage the lifetime of
1500         // this data structure, but currently it's the easiest way to
1501         // make it work.
1502         MdrunScheduleWorkload runScheduleWork;
1503
1504         GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
1505         SimulatorBuilder simulatorBuilder;
1506
1507         // build and run simulator object based on user-input
1508         const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
1509                     false,
1510                     inputrec, doRerun, vsite.get(), ms, replExParams,
1511                     fcd, static_cast<int>(filenames.size()), filenames.data(),
1512                     &observablesHistory, membed);
1513         auto simulator = simulatorBuilder.build(
1514                     inputIsCompatibleWithModularSimulator,
1515                     fplog, cr, ms, mdlog, static_cast<int>(filenames.size()), filenames.data(),
1516                     oenv,
1517                     mdrunOptions,
1518                     startingBehavior,
1519                     vsite.get(), constr.get(),
1520                     enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
1521                     deform.get(),
1522                     mdModules_->outputProvider(),
1523                     mdModules_->notifier(),
1524                     inputrec, imdSession.get(), pull_work, swap, &mtop,
1525                     fcd,
1526                     globalState.get(),
1527                     &observablesHistory,
1528                     mdAtoms.get(), &nrnb, wcycle, fr,
1529                     &enerd,
1530                     &ekind,
1531                     &runScheduleWork,
1532                     replExParams,
1533                     membed,
1534                     walltime_accounting,
1535                     std::move(stopHandlerBuilder_),
1536                     doRerun,
1537                     useGpuForUpdate);
1538         simulator->run();
1539
1540         if (inputrec->bPull)
1541         {
1542             finish_pull(pull_work);
1543         }
1544         finish_swapcoords(swap);
1545     }
1546     else
1547     {
1548         GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
1549         /* do PME only */
1550         walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
1551         gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
1552     }
1553
1554     wallcycle_stop(wcycle, ewcRUN);
1555
1556     /* Finish up, write some stuff
1557      * if rerunMD, don't write last frame again
1558      */
1559     finish_run(fplog, mdlog, cr,
1560                inputrec, &nrnb, wcycle, walltime_accounting,
1561                fr ? fr->nbv.get() : nullptr,
1562                pmedata,
1563                EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
1564
1565     // clean up cycle counter
1566     wallcycle_destroy(wcycle);
1567
1568 // Free PME data
1569     if (pmedata)
1570     {
1571         gmx_pme_destroy(pmedata);
1572         pmedata = nullptr;
1573     }
1574
1575     // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
1576     // before we destroy the GPU context(s) in free_gpu_resources().
1577     // Pinned buffers are associated with contexts in CUDA.
1578     // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
1579     mdAtoms.reset(nullptr);
1580     globalState.reset(nullptr);
1581     mdModules_.reset(nullptr);   // destruct force providers here as they might also use the GPU
1582
1583     /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
1584     free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
1585     free_gpu(nonbondedDeviceInfo);
1586     free_gpu(pmeDeviceInfo);
1587     done_forcerec(fr, mtop.molblock.size());
1588     sfree(fcd);
1589
1590     if (doMembed)
1591     {
1592         free_membed(membed);
1593     }
1594
1595     /* Does what it says */
1596     print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
1597     walltime_accounting_destroy(walltime_accounting);
1598
1599     // Ensure log file content is written
1600     if (logFileHandle)
1601     {
1602         gmx_fio_flush(logFileHandle);
1603     }
1604
1605     /* Reset FPEs (important for unit tests) by disabling them. Assumes no
1606      * exceptions were enabled before function was called. */
1607     if (bEnableFPE)
1608     {
1609         gmx_fedisableexcept();
1610     }
1611
1612     auto rc = static_cast<int>(gmx_get_stop_condition());
1613
1614 #if GMX_THREAD_MPI
1615     /* we need to join all threads. The sub-threads join when they
1616        exit this function, but the master thread needs to be told to
1617        wait for that. */
1618     if (PAR(cr) && MASTER(cr))
1619     {
1620         tMPI_Finalize();
1621     }
1622 #endif
1623     return rc;
1624 }
1625
1626 Mdrunner::~Mdrunner()
1627 {
1628     // Clean up of the Manager.
1629     // This will end up getting called on every thread-MPI rank, which is unnecessary,
1630     // but okay as long as threads synchronize some time before adding or accessing
1631     // a new set of restraints.
1632     if (restraintManager_)
1633     {
1634         restraintManager_->clear();
1635         GMX_ASSERT(restraintManager_->countRestraints() == 0,
1636                    "restraints added during runner life time should be cleared at runner destruction.");
1637     }
1638 };
1639
1640 void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller,
1641                             const std::string                        &name)
1642 {
1643     GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
1644     // Not sure if this should be logged through the md logger or something else,
1645     // but it is helpful to have some sort of INFO level message sent somewhere.
1646     //    std::cout << "Registering restraint named " << name << std::endl;
1647
1648     // When multiple restraints are used, it may be wasteful to register them separately.
1649     // Maybe instead register an entire Restraint Manager as a force provider.
1650     restraintManager_->addToSpec(std::move(puller),
1651                                  name);
1652 }
1653
1654 Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules)
1655     : mdModules_(std::move(mdModules))
1656 {
1657 }
1658
1659 Mdrunner::Mdrunner(Mdrunner &&) noexcept = default;
1660
1661 //NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
1662 Mdrunner &Mdrunner::operator=(Mdrunner && /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
1663
1664 class Mdrunner::BuilderImplementation
1665 {
1666     public:
1667         BuilderImplementation() = delete;
1668         BuilderImplementation(std::unique_ptr<MDModules> mdModules);
1669         ~BuilderImplementation();
1670
1671         BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options,
1672                                                     real                forceWarningThreshold,
1673                                                     StartingBehavior    startingBehavior);
1674
1675         void addDomdec(const DomdecOptions &options);
1676
1677         void addVerletList(int nstlist);
1678
1679         void addReplicaExchange(const ReplicaExchangeParameters &params);
1680
1681         void addMultiSim(gmx_multisim_t* multisim);
1682
1683         void addCommunicationRecord(t_commrec *cr);
1684
1685         void addNonBonded(const char* nbpu_opt);
1686
1687         void addPME(const char* pme_opt_, const char* pme_fft_opt_);
1688
1689         void addBondedTaskAssignment(const char* bonded_opt);
1690
1691         void addUpdateTaskAssignment(const char* update_opt);
1692
1693         void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions);
1694
1695         void addFilenames(ArrayRef <const t_filenm> filenames);
1696
1697         void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
1698
1699         void addLogFile(t_fileio *logFileHandle);
1700
1701         void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
1702
1703         Mdrunner build();
1704
1705     private:
1706
1707         // Default parameters copied from runner.h
1708         // \todo Clarify source(s) of default parameters.
1709
1710         const char* nbpu_opt_          = nullptr;
1711         const char* pme_opt_           = nullptr;
1712         const char* pme_fft_opt_       = nullptr;
1713         const char *bonded_opt_        = nullptr;
1714         const char *update_opt_        = nullptr;
1715
1716         MdrunOptions                          mdrunOptions_;
1717
1718         DomdecOptions                         domdecOptions_;
1719
1720         ReplicaExchangeParameters             replicaExchangeParameters_;
1721
1722         //! Command-line override for the duration of a neighbor list with the Verlet scheme.
1723         int         nstlist_ = 0;
1724
1725         //! Multisim communicator handle.
1726         std::unique_ptr<gmx_multisim_t*>      multisim_ = nullptr;
1727
1728         //! Non-owning communication record (only used when building command-line mdrun)
1729         t_commrec *communicationRecord_ = nullptr;
1730
1731         //! Print a warning if any force is larger than this (in kJ/mol nm).
1732         real forceWarningThreshold_ = -1;
1733
1734         //! Whether the simulation will start afresh, or restart with/without appending.
1735         StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
1736
1737         //! The modules that comprise the functionality of mdrun.
1738         std::unique_ptr<MDModules> mdModules_;
1739
1740         //! \brief Parallelism information.
1741         gmx_hw_opt_t hardwareOptions_;
1742
1743         //! filename options for simulation.
1744         ArrayRef<const t_filenm> filenames_;
1745
1746         /*! \brief Handle to output environment.
1747          *
1748          * \todo gmx_output_env_t needs lifetime management.
1749          */
1750         gmx_output_env_t*    outputEnvironment_ = nullptr;
1751
1752         /*! \brief Non-owning handle to MD log file.
1753          *
1754          * \todo Context should own output facilities for client.
1755          * \todo Improve log file handle management.
1756          * \internal
1757          * Code managing the FILE* relies on the ability to set it to
1758          * nullptr to check whether the filehandle is valid.
1759          */
1760         t_fileio* logFileHandle_ = nullptr;
1761
1762         /*!
1763          * \brief Builder for simulation stop signal handler.
1764          */
1765         std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
1766 };
1767
1768 Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules) :
1769     mdModules_(std::move(mdModules))
1770 {
1771 }
1772
1773 Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
1774
1775 Mdrunner::BuilderImplementation &
1776 Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions    &options,
1777                                                       const real             forceWarningThreshold,
1778                                                       const StartingBehavior startingBehavior)
1779 {
1780     mdrunOptions_          = options;
1781     forceWarningThreshold_ = forceWarningThreshold;
1782     startingBehavior_      = startingBehavior;
1783     return *this;
1784 }
1785
1786 void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions &options)
1787 {
1788     domdecOptions_ = options;
1789 }
1790
1791 void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
1792 {
1793     nstlist_ = nstlist;
1794 }
1795
1796 void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters &params)
1797 {
1798     replicaExchangeParameters_ = params;
1799 }
1800
1801 void Mdrunner::BuilderImplementation::addMultiSim(gmx_multisim_t* multisim)
1802 {
1803     multisim_ = std::make_unique<gmx_multisim_t*>(multisim);
1804 }
1805
1806 void Mdrunner::BuilderImplementation::addCommunicationRecord(t_commrec *cr)
1807 {
1808     communicationRecord_ = cr;
1809 }
1810
1811 Mdrunner Mdrunner::BuilderImplementation::build()
1812 {
1813     auto newRunner = Mdrunner(std::move(mdModules_));
1814
1815     newRunner.mdrunOptions          = mdrunOptions_;
1816     newRunner.pforce                = forceWarningThreshold_;
1817     newRunner.startingBehavior      = startingBehavior_;
1818     newRunner.domdecOptions         = domdecOptions_;
1819
1820     // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
1821     newRunner.hw_opt          = hardwareOptions_;
1822
1823     // No invariant to check. This parameter exists to optionally override other behavior.
1824     newRunner.nstlist_cmdline = nstlist_;
1825
1826     newRunner.replExParams    = replicaExchangeParameters_;
1827
1828     newRunner.filenames = filenames_;
1829
1830     GMX_ASSERT(communicationRecord_, "Bug found. It should not be possible to call build() without a valid communicationRecord_.");
1831     newRunner.cr = communicationRecord_;
1832
1833     if (multisim_)
1834     {
1835         // nullptr is a valid value for the multisim handle, so we don't check the pointed-to pointer.
1836         newRunner.ms = *multisim_;
1837     }
1838     else
1839     {
1840         GMX_THROW(gmx::APIError("MdrunnerBuilder::addMultiSim() is required before build()"));
1841     }
1842
1843     // \todo Clarify ownership and lifetime management for gmx_output_env_t
1844     // \todo Update sanity checking when output environment has clearly specified invariants.
1845     // Initialization and default values for oenv are not well specified in the current version.
1846     if (outputEnvironment_)
1847     {
1848         newRunner.oenv  = outputEnvironment_;
1849     }
1850     else
1851     {
1852         GMX_THROW(gmx::APIError("MdrunnerBuilder::addOutputEnvironment() is required before build()"));
1853     }
1854
1855     newRunner.logFileHandle = logFileHandle_;
1856
1857     if (nbpu_opt_)
1858     {
1859         newRunner.nbpu_opt    = nbpu_opt_;
1860     }
1861     else
1862     {
1863         GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
1864     }
1865
1866     if (pme_opt_ && pme_fft_opt_)
1867     {
1868         newRunner.pme_opt     = pme_opt_;
1869         newRunner.pme_fft_opt = pme_fft_opt_;
1870     }
1871     else
1872     {
1873         GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
1874     }
1875
1876     if (bonded_opt_)
1877     {
1878         newRunner.bonded_opt = bonded_opt_;
1879     }
1880     else
1881     {
1882         GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
1883     }
1884
1885     if (update_opt_)
1886     {
1887         newRunner.update_opt = update_opt_;
1888     }
1889     else
1890     {
1891         GMX_THROW(gmx::APIError("MdrunnerBuilder::addUpdateTaskAssignment() is required before build()  "));
1892     }
1893
1894
1895     newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
1896
1897     if (stopHandlerBuilder_)
1898     {
1899         newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
1900     }
1901     else
1902     {
1903         newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
1904     }
1905
1906     return newRunner;
1907 }
1908
1909 void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
1910 {
1911     nbpu_opt_ = nbpu_opt;
1912 }
1913
1914 void Mdrunner::BuilderImplementation::addPME(const char* pme_opt,
1915                                              const char* pme_fft_opt)
1916 {
1917     pme_opt_     = pme_opt;
1918     pme_fft_opt_ = pme_fft_opt;
1919 }
1920
1921 void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
1922 {
1923     bonded_opt_ = bonded_opt;
1924 }
1925
1926 void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt)
1927 {
1928     update_opt_ = update_opt;
1929 }
1930
1931 void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
1932 {
1933     hardwareOptions_ = hardwareOptions;
1934 }
1935
1936 void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
1937 {
1938     filenames_ = filenames;
1939 }
1940
1941 void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
1942 {
1943     outputEnvironment_ = outputEnvironment;
1944 }
1945
1946 void Mdrunner::BuilderImplementation::addLogFile(t_fileio *logFileHandle)
1947 {
1948     logFileHandle_ = logFileHandle;
1949 }
1950
1951 void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
1952 {
1953     stopHandlerBuilder_ = std::move(builder);
1954 }
1955
1956 MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules) :
1957     impl_ {std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules))}
1958 {
1959 }
1960
1961 MdrunnerBuilder::~MdrunnerBuilder() = default;
1962
1963 MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions    &options,
1964                                                       real                   forceWarningThreshold,
1965                                                       const StartingBehavior startingBehavior)
1966 {
1967     impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
1968     return *this;
1969 }
1970
1971 MdrunnerBuilder &MdrunnerBuilder::addDomainDecomposition(const DomdecOptions &options)
1972 {
1973     impl_->addDomdec(options);
1974     return *this;
1975 }
1976
1977 MdrunnerBuilder &MdrunnerBuilder::addNeighborList(int nstlist)
1978 {
1979     impl_->addVerletList(nstlist);
1980     return *this;
1981 }
1982
1983 MdrunnerBuilder &MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters &params)
1984 {
1985     impl_->addReplicaExchange(params);
1986     return *this;
1987 }
1988
1989 MdrunnerBuilder &MdrunnerBuilder::addMultiSim(gmx_multisim_t* multisim)
1990 {
1991     impl_->addMultiSim(multisim);
1992     return *this;
1993 }
1994
1995 MdrunnerBuilder &MdrunnerBuilder::addCommunicationRecord(t_commrec *cr)
1996 {
1997     impl_->addCommunicationRecord(cr);
1998     return *this;
1999 }
2000
2001 MdrunnerBuilder &MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
2002 {
2003     impl_->addNonBonded(nbpu_opt);
2004     return *this;
2005 }
2006
2007 MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt,
2008                                                     const char* pme_fft_opt)
2009 {
2010     // The builder method may become more general in the future, but in this version,
2011     // parameters for PME electrostatics are both required and the only parameters
2012     // available.
2013     if (pme_opt && pme_fft_opt)
2014     {
2015         impl_->addPME(pme_opt, pme_fft_opt);
2016     }
2017     else
2018     {
2019         GMX_THROW(gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
2020     }
2021     return *this;
2022 }
2023
2024 MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
2025 {
2026     impl_->addBondedTaskAssignment(bonded_opt);
2027     return *this;
2028 }
2029
2030 MdrunnerBuilder &MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt)
2031 {
2032     impl_->addUpdateTaskAssignment(update_opt);
2033     return *this;
2034 }
2035
2036 Mdrunner MdrunnerBuilder::build()
2037 {
2038     return impl_->build();
2039 }
2040
2041 MdrunnerBuilder &MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions)
2042 {
2043     impl_->addHardwareOptions(hardwareOptions);
2044     return *this;
2045 }
2046
2047 MdrunnerBuilder &MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
2048 {
2049     impl_->addFilenames(filenames);
2050     return *this;
2051 }
2052
2053 MdrunnerBuilder &MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
2054 {
2055     impl_->addOutputEnvironment(outputEnvironment);
2056     return *this;
2057 }
2058
2059 MdrunnerBuilder &MdrunnerBuilder::addLogFile(t_fileio *logFileHandle)
2060 {
2061     impl_->addLogFile(logFileHandle);
2062     return *this;
2063 }
2064
2065 MdrunnerBuilder &MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
2066 {
2067     impl_->addStopHandlerBuilder(std::move(builder));
2068     return *this;
2069 }
2070
2071 MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder &&) noexcept = default;
2072
2073 MdrunnerBuilder &MdrunnerBuilder::operator=(MdrunnerBuilder &&) noexcept = default;
2074
2075 } // namespace gmx