Add basic interface to run update on GPU
[alexxy/gromacs.git] / src / gromacs / mdrun / md.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief Implements the integrator for normal molecular dynamics simulations
40  *
41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42  * \ingroup module_mdrun
43  */
44 #include "gmxpre.h"
45
46 #include <cinttypes>
47 #include <cmath>
48 #include <cstdio>
49 #include <cstdlib>
50
51 #include <algorithm>
52 #include <memory>
53
54 #include "gromacs/awh/awh.h"
55 #include "gromacs/commandline/filenm.h"
56 #include "gromacs/domdec/collect.h"
57 #include "gromacs/domdec/dlbtiming.h"
58 #include "gromacs/domdec/domdec.h"
59 #include "gromacs/domdec/domdec_network.h"
60 #include "gromacs/domdec/domdec_struct.h"
61 #include "gromacs/domdec/mdsetup.h"
62 #include "gromacs/domdec/partition.h"
63 #include "gromacs/essentialdynamics/edsam.h"
64 #include "gromacs/ewald/pme.h"
65 #include "gromacs/ewald/pme_load_balancing.h"
66 #include "gromacs/fileio/trxio.h"
67 #include "gromacs/gmxlib/network.h"
68 #include "gromacs/gmxlib/nrnb.h"
69 #include "gromacs/gpu_utils/gpu_utils.h"
70 #include "gromacs/imd/imd.h"
71 #include "gromacs/listed_forces/manage_threading.h"
72 #include "gromacs/math/functions.h"
73 #include "gromacs/math/utilities.h"
74 #include "gromacs/math/vec.h"
75 #include "gromacs/math/vectypes.h"
76 #include "gromacs/mdlib/checkpointhandler.h"
77 #include "gromacs/mdlib/compute_io.h"
78 #include "gromacs/mdlib/constr.h"
79 #include "gromacs/mdlib/ebin.h"
80 #include "gromacs/mdlib/enerdata_utils.h"
81 #include "gromacs/mdlib/energyoutput.h"
82 #include "gromacs/mdlib/expanded.h"
83 #include "gromacs/mdlib/force.h"
84 #include "gromacs/mdlib/force_flags.h"
85 #include "gromacs/mdlib/forcerec.h"
86 #include "gromacs/mdlib/md_support.h"
87 #include "gromacs/mdlib/mdatoms.h"
88 #include "gromacs/mdlib/mdoutf.h"
89 #include "gromacs/mdlib/membed.h"
90 #include "gromacs/mdlib/resethandler.h"
91 #include "gromacs/mdlib/sighandler.h"
92 #include "gromacs/mdlib/simulationsignal.h"
93 #include "gromacs/mdlib/stat.h"
94 #include "gromacs/mdlib/stophandler.h"
95 #include "gromacs/mdlib/tgroup.h"
96 #include "gromacs/mdlib/trajectory_writing.h"
97 #include "gromacs/mdlib/update.h"
98 #include "gromacs/mdlib/update_constrain_cuda.h"
99 #include "gromacs/mdlib/vcm.h"
100 #include "gromacs/mdlib/vsite.h"
101 #include "gromacs/mdrunutility/handlerestart.h"
102 #include "gromacs/mdrunutility/multisim.h"
103 #include "gromacs/mdrunutility/printtime.h"
104 #include "gromacs/mdtypes/awh_history.h"
105 #include "gromacs/mdtypes/awh_params.h"
106 #include "gromacs/mdtypes/commrec.h"
107 #include "gromacs/mdtypes/df_history.h"
108 #include "gromacs/mdtypes/energyhistory.h"
109 #include "gromacs/mdtypes/fcdata.h"
110 #include "gromacs/mdtypes/forcerec.h"
111 #include "gromacs/mdtypes/group.h"
112 #include "gromacs/mdtypes/inputrec.h"
113 #include "gromacs/mdtypes/interaction_const.h"
114 #include "gromacs/mdtypes/md_enums.h"
115 #include "gromacs/mdtypes/mdatom.h"
116 #include "gromacs/mdtypes/mdrunoptions.h"
117 #include "gromacs/mdtypes/observableshistory.h"
118 #include "gromacs/mdtypes/pullhistory.h"
119 #include "gromacs/mdtypes/state.h"
120 #include "gromacs/modularsimulator/energyelement.h"
121 #include "gromacs/nbnxm/nbnxm.h"
122 #include "gromacs/pbcutil/mshift.h"
123 #include "gromacs/pbcutil/pbc.h"
124 #include "gromacs/pulling/output.h"
125 #include "gromacs/pulling/pull.h"
126 #include "gromacs/swap/swapcoords.h"
127 #include "gromacs/timing/wallcycle.h"
128 #include "gromacs/timing/walltime_accounting.h"
129 #include "gromacs/topology/atoms.h"
130 #include "gromacs/topology/idef.h"
131 #include "gromacs/topology/mtop_util.h"
132 #include "gromacs/topology/topology.h"
133 #include "gromacs/trajectory/trajectoryframe.h"
134 #include "gromacs/utility/basedefinitions.h"
135 #include "gromacs/utility/cstringutil.h"
136 #include "gromacs/utility/fatalerror.h"
137 #include "gromacs/utility/logger.h"
138 #include "gromacs/utility/real.h"
139 #include "gromacs/utility/smalloc.h"
140
141 #include "legacysimulator.h"
142 #include "replicaexchange.h"
143 #include "shellfc.h"
144
145 #if GMX_FAHCORE
146 #include "corewrap.h"
147 #endif
148
149 using gmx::SimulationSignaller;
150
151 void gmx::LegacySimulator::do_md()
152 {
153     // TODO Historically, the EM and MD "integrators" used different
154     // names for the t_inputrec *parameter, but these must have the
155     // same name, now that it's a member of a struct. We use this ir
156     // alias to avoid a large ripple of nearly useless changes.
157     // t_inputrec is being replaced by IMdpOptionsProvider, so this
158     // will go away eventually.
159     t_inputrec                 *ir   = inputrec;
160     int64_t                     step, step_rel;
161     double                      t, t0 = ir->init_t, lam0[efptNR];
162     gmx_bool                    bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
163     gmx_bool                    bNS, bNStList, bStopCM,
164                                 bFirstStep, bInitStep, bLastStep = FALSE;
165     gmx_bool                    bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
166     gmx_bool                    do_ene, do_log, do_verbose;
167     gmx_bool                    bMasterState;
168     unsigned int                force_flags;
169     tensor                      force_vir     = {{0}}, shake_vir = {{0}}, total_vir = {{0}},
170                                 tmp_vir       = {{0}}, pres = {{0}};
171     int                         i, m;
172     rvec                        mu_tot;
173     matrix                      parrinellorahmanMu, M;
174     gmx_repl_ex_t               repl_ex = nullptr;
175     gmx_localtop_t              top;
176     PaddedHostVector<gmx::RVec> f {};
177     gmx_global_stat_t           gstat;
178     t_graph                    *graph = nullptr;
179     gmx_shellfc_t              *shellfc;
180     gmx_bool                    bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
181     gmx_bool                    bTemp, bPres, bTrotter;
182     real                        dvdl_constr;
183     std::vector<RVec>           cbuf;
184     matrix                      lastbox;
185     int                         lamnew  = 0;
186     /* for FEP */
187     int                         nstfep = 0;
188     double                      cycles;
189     real                        saved_conserved_quantity = 0;
190     real                        last_ekin                = 0;
191     t_extmass                   MassQ;
192     char                        sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
193
194     /* PME load balancing data for GPU kernels */
195     gmx_bool              bPMETune         = FALSE;
196     gmx_bool              bPMETunePrinting = FALSE;
197
198     bool                  bInteractiveMDstep = false;
199
200     /* Domain decomposition could incorrectly miss a bonded
201        interaction, but checking for that requires a global
202        communication stage, which does not otherwise happen in DD
203        code. So we do that alongside the first global energy reduction
204        after a new DD is made. These variables handle whether the
205        check happens, and the result it returns. */
206     bool              shouldCheckNumberOfBondedInteractions = false;
207     int               totalNumberOfBondedInteractions       = -1;
208
209     SimulationSignals signals;
210     // Most global communnication stages don't propagate mdrun
211     // signals, and will use this object to achieve that.
212     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
213
214     if (!mdrunOptions.writeConfout)
215     {
216         // This is on by default, and the main known use case for
217         // turning it off is for convenience in benchmarking, which is
218         // something that should not show up in the general user
219         // interface.
220         GMX_LOG(mdlog.info).asParagraph().
221             appendText("The -noconfout functionality is deprecated, and may be removed in a future version.");
222     }
223
224     /* md-vv uses averaged full step velocities for T-control
225        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
226        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
227     bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
228
229     const bool bRerunMD      = false;
230
231     int        nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
232     bGStatEveryStep = (nstglobalcomm == 1);
233
234     SimulationGroups                  *groups = &top_global->groups;
235
236     std::unique_ptr<EssentialDynamics> ed = nullptr;
237     if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr)
238     {
239         /* Initialize essential dynamics sampling */
240         ed = init_edsam(mdlog,
241                         opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm),
242                         top_global,
243                         ir, cr, constr,
244                         state_global, observablesHistory,
245                         oenv,
246                         startingBehavior);
247     }
248
249     initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
250     Update     upd(ir, deform);
251     const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
252     if (startingBehavior != StartingBehavior::RestartWithAppending)
253     {
254         pleaseCiteCouplingAlgorithms(fplog, *ir);
255     }
256     gmx_mdoutf       *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir, top_global, oenv, wcycle,
257                                          StartingBehavior::NewSimulation);
258     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, mdoutf_get_fp_dhdl(outf), false, mdModulesNotifier);
259
260     gstat = global_stat_init(ir);
261
262     /* Check for polarizable models and flexible constraints */
263     shellfc = init_shell_flexcon(fplog,
264                                  top_global, constr ? constr->numFlexibleConstraints() : 0,
265                                  ir->nstcalcenergy, DOMAINDECOMP(cr));
266
267     {
268         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
269         if ((io > 2000) && MASTER(cr))
270         {
271             fprintf(stderr,
272                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
273                     io);
274         }
275     }
276
277     // Local state only becomes valid now.
278     std::unique_ptr<t_state> stateInstance;
279     t_state *                state;
280
281
282     auto mdatoms = mdAtoms->mdatoms();
283
284     std::unique_ptr<UpdateConstrainCuda> integrator;
285
286     if (DOMAINDECOMP(cr))
287     {
288         dd_init_local_top(*top_global, &top);
289
290         stateInstance = std::make_unique<t_state>();
291         state         = stateInstance.get();
292         dd_init_local_state(cr->dd, state_global, state);
293
294         /* Distribute the charge groups over the nodes from the master node */
295         dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1,
296                             state_global, *top_global, ir, imdSession,
297                             pull_work,
298                             state, &f, mdAtoms, &top, fr,
299                             vsite, constr,
300                             nrnb, nullptr, FALSE);
301         shouldCheckNumberOfBondedInteractions = true;
302         upd.setNumAtoms(state->natoms);
303     }
304     else
305     {
306         state_change_natoms(state_global, state_global->natoms);
307         f.resizeWithPadding(state_global->natoms);
308         /* Copy the pointer to the global state */
309         state = state_global;
310
311         /* Generate and initialize new topology */
312         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr,
313                                   &graph, mdAtoms, constr, vsite, shellfc);
314
315         upd.setNumAtoms(state->natoms);
316     }
317
318     if (useGpuForUpdate)
319     {
320         GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU.");
321         GMX_RELEASE_ASSERT(ir->etc != etcNOSEHOOVER, "Nose Hoover temperature coupling is not supported on the GPU.");
322         GMX_RELEASE_ASSERT(ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN, "Only Parrinello Rahman pressure control is supported on the GPU.");
323         GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU");
324         GMX_RELEASE_ASSERT(ed == nullptr, "Essential dynamics is not supported with GPU-based update constraints.");
325         GMX_LOG(mdlog.info).asParagraph().
326             appendText("Updating coordinates on the GPU.");
327         integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global);
328         integrator->set(top.idef, *mdatoms, ekind->ngtc);
329         t_pbc pbc;
330         set_pbc(&pbc, epbcXYZ, state->box);
331         integrator->setPbc(&pbc);
332     }
333
334     if (fr->nbv->useGpu())
335     {
336         changePinningPolicy(&state->x, gmx::PinningPolicy::PinnedIfSupported);
337     }
338
339     // NOTE: The global state is no longer used at this point.
340     // But state_global is still used as temporary storage space for writing
341     // the global state to file and potentially for replica exchange.
342     // (Global topology should persist.)
343
344     update_mdatoms(mdatoms, state->lambda[efptMASS]);
345
346     if (ir->bExpanded)
347     {
348         /* Check nstexpanded here, because the grompp check was broken */
349         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
350         {
351             gmx_fatal(FARGS, "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
352         }
353         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation,
354                                ir, state->dfhist);
355     }
356
357     if (MASTER(cr))
358     {
359         EnergyElement::initializeEnergyHistory(
360                 startingBehavior, observablesHistory, &energyOutput);
361     }
362
363     preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
364                            startingBehavior != StartingBehavior::NewSimulation);
365
366     // TODO: Remove this by converting AWH into a ForceProvider
367     auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingBehavior != StartingBehavior::NewSimulation,
368                                 shellfc != nullptr,
369                                 opt2fn("-awh", nfile, fnm), pull_work);
370
371     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
372     if (useReplicaExchange && MASTER(cr))
373     {
374         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir,
375                                         replExParams);
376     }
377     /* PME tuning is only supported in the Verlet scheme, with PME for
378      * Coulomb. It is not supported with only LJ PME. */
379     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) &&
380                 !mdrunOptions.reproducible && ir->cutoff_scheme != ecutsGROUP);
381
382     pme_load_balancing_t *pme_loadbal      = nullptr;
383     if (bPMETune)
384     {
385         pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
386                          *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu(),
387                          &bPMETunePrinting);
388     }
389
390     if (!ir->bContinuation)
391     {
392         if (state->flags & (1U << estV))
393         {
394             auto v = makeArrayRef(state->v);
395             /* Set the velocities of vsites, shells and frozen atoms to zero */
396             for (i = 0; i < mdatoms->homenr; i++)
397             {
398                 if (mdatoms->ptype[i] == eptVSite ||
399                     mdatoms->ptype[i] == eptShell)
400                 {
401                     clear_rvec(v[i]);
402                 }
403                 else if (mdatoms->cFREEZE)
404                 {
405                     for (m = 0; m < DIM; m++)
406                     {
407                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
408                         {
409                             v[i][m] = 0;
410                         }
411                     }
412                 }
413             }
414         }
415
416         if (constr)
417         {
418             /* Constrain the initial coordinates and velocities */
419             do_constrain_first(fplog, constr, ir, mdatoms,
420                                state->natoms,
421                                state->x.arrayRefWithPadding(),
422                                state->v.arrayRefWithPadding(),
423                                state->box, state->lambda[efptBONDED]);
424         }
425         if (vsite)
426         {
427             /* Construct the virtual sites for the initial configuration */
428             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr,
429                              top.idef.iparams, top.idef.il,
430                              fr->ePBC, fr->bMolPBC, cr, state->box);
431         }
432     }
433
434     if (ir->efep != efepNO)
435     {
436         /* Set free energy calculation frequency as the greatest common
437          * denominator of nstdhdl and repl_ex_nst. */
438         nstfep = ir->fepvals->nstdhdl;
439         if (ir->bExpanded)
440         {
441             nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
442         }
443         if (useReplicaExchange)
444         {
445             nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
446         }
447     }
448
449     /* Be REALLY careful about what flags you set here. You CANNOT assume
450      * this is the first step, since we might be restarting from a checkpoint,
451      * and in that case we should not do any modifications to the state.
452      */
453     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
454
455     // When restarting from a checkpoint, it can be appropriate to
456     // initialize ekind from quantities in the checkpoint. Otherwise,
457     // compute_globals must initialize ekind before the simulation
458     // starts/restarts. However, only the master rank knows what was
459     // found in the checkpoint file, so we have to communicate in
460     // order to coordinate the restart.
461     //
462     // TODO Consider removing this communication if/when checkpoint
463     // reading directly follows .tpr reading, because all ranks can
464     // agree on hasReadEkinState at that time.
465     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
466     if (PAR(cr))
467     {
468         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
469     }
470     if (hasReadEkinState)
471     {
472         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
473     }
474
475     unsigned int cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
476                                | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
477                                | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0)
478                                | (hasReadEkinState ? CGLO_READEKIN : 0));
479
480     bSumEkinhOld = FALSE;
481
482     t_vcm vcm(top_global->groups, *ir);
483     reportComRemovalInfo(fplog, vcm);
484
485     /* To minimize communication, compute_globals computes the COM velocity
486      * and the kinetic energy for the velocities without COM motion removed.
487      * Thus to get the kinetic energy without the COM contribution, we need
488      * to call compute_globals twice.
489      */
490     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
491     {
492         unsigned int cglo_flags_iteration = cglo_flags;
493         if (bStopCM && cgloIteration == 0)
494         {
495             cglo_flags_iteration |= CGLO_STOPCM;
496             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
497         }
498         compute_globals(gstat, cr, ir, fr, ekind,
499                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
500                         mdatoms, nrnb, &vcm,
501                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
502                         constr, &nullSignaller, state->box,
503                         &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags_iteration
504                         | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0));
505         if (cglo_flags_iteration & CGLO_STOPCM)
506         {
507             /* At initialization, do not pass x with acceleration-correction mode
508              * to avoid (incorrect) correction of the initial coordinates.
509              */
510             rvec *xPtr = nullptr;
511             if (vcm.mode != ecmLINEAR_ACCELERATION_CORRECTION)
512             {
513                 xPtr = state->x.rvec_array();
514             }
515             process_and_stopcm_grp(fplog, &vcm, *mdatoms, xPtr, state->v.rvec_array());
516             inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
517         }
518     }
519     checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
520                                     top_global, &top, state->x.rvec_array(), state->box,
521                                     &shouldCheckNumberOfBondedInteractions);
522     if (ir->eI == eiVVAK)
523     {
524         /* a second call to get the half step temperature initialized as well */
525         /* we do the same call as above, but turn the pressure off -- internally to
526            compute_globals, this is recognized as a velocity verlet half-step
527            kinetic energy calculation.  This minimized excess variables, but
528            perhaps loses some logic?*/
529
530         compute_globals(gstat, cr, ir, fr, ekind,
531                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
532                         mdatoms, nrnb, &vcm,
533                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
534                         constr, &nullSignaller, state->box,
535                         nullptr, &bSumEkinhOld,
536                         cglo_flags & ~CGLO_PRESSURE);
537     }
538
539     /* Calculate the initial half step temperature, and save the ekinh_old */
540     if (startingBehavior == StartingBehavior::NewSimulation)
541     {
542         for (i = 0; (i < ir->opts.ngtc); i++)
543         {
544             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
545         }
546     }
547
548     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
549        temperature control */
550     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
551
552     if (MASTER(cr))
553     {
554         if (!ir->bContinuation)
555         {
556             if (constr && ir->eConstrAlg == econtLINCS)
557             {
558                 fprintf(fplog,
559                         "RMS relative constraint deviation after constraining: %.2e\n",
560                         constr->rmsd());
561             }
562             if (EI_STATE_VELOCITY(ir->eI))
563             {
564                 real temp = enerd->term[F_TEMP];
565                 if (ir->eI != eiVV)
566                 {
567                     /* Result of Ekin averaged over velocities of -half
568                      * and +half step, while we only have -half step here.
569                      */
570                     temp *= 2;
571                 }
572                 fprintf(fplog, "Initial temperature: %g K\n", temp);
573             }
574         }
575
576         char tbuf[20];
577         fprintf(stderr, "starting mdrun '%s'\n",
578                 *(top_global->name));
579         if (ir->nsteps >= 0)
580         {
581             sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
582         }
583         else
584         {
585             sprintf(tbuf, "%s", "infinite");
586         }
587         if (ir->init_step > 0)
588         {
589             fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
590                     gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
591                     gmx_step_str(ir->init_step, sbuf2),
592                     ir->init_step*ir->delta_t);
593         }
594         else
595         {
596             fprintf(stderr, "%s steps, %s ps.\n",
597                     gmx_step_str(ir->nsteps, sbuf), tbuf);
598         }
599         fprintf(fplog, "\n");
600     }
601
602     walltime_accounting_start_time(walltime_accounting);
603     wallcycle_start(wcycle, ewcRUN);
604     print_start(fplog, cr, walltime_accounting, "mdrun");
605
606 #if GMX_FAHCORE
607     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
608     int chkpt_ret = fcCheckPointParallel( cr->nodeid,
609                                           NULL, 0);
610     if (chkpt_ret == 0)
611     {
612         gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
613     }
614 #endif
615
616     /***********************************************************
617      *
618      *             Loop over MD steps
619      *
620      ************************************************************/
621
622     bFirstStep       = TRUE;
623     /* Skip the first Nose-Hoover integration when we get the state from tpx */
624     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
625     bSumEkinhOld     = FALSE;
626     bExchanged       = FALSE;
627     bNeedRepartition = FALSE;
628
629     bool simulationsShareState = false;
630     int  nstSignalComm         = nstglobalcomm;
631     {
632         // TODO This implementation of ensemble orientation restraints is nasty because
633         // a user can't just do multi-sim with single-sim orientation restraints.
634         bool usingEnsembleRestraints = (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
635         bool awhUsesMultiSim         = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
636
637         // Replica exchange, ensemble restraints and AWH need all
638         // simulations to remain synchronized, so they need
639         // checkpoints and stop conditions to act on the same step, so
640         // the propagation of such signals must take place between
641         // simulations, not just within simulations.
642         // TODO: Make algorithm initializers set these flags.
643         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
644
645         if (simulationsShareState)
646         {
647             // Inter-simulation signal communication does not need to happen
648             // often, so we use a minimum of 200 steps to reduce overhead.
649             const int c_minimumInterSimulationSignallingInterval = 200;
650             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1)/nstglobalcomm)*nstglobalcomm;
651         }
652     }
653
654     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
655                 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
656                 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
657                 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
658
659     auto checkpointHandler = std::make_unique<CheckpointHandler>(
660                 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
661                 simulationsShareState, ir->nstlist == 0, MASTER(cr),
662                 mdrunOptions.writeConfout, mdrunOptions.checkpointOptions.period);
663
664     const bool resetCountersIsLocal = true;
665     auto       resetHandler         = std::make_unique<ResetHandler>(
666                 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]), !resetCountersIsLocal,
667                 ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
668                 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
669
670     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
671
672     step     = ir->init_step;
673     step_rel = 0;
674
675     // TODO extract this to new multi-simulation module
676     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
677     {
678         if (!multisim_int_all_are_equal(ms, ir->nsteps))
679         {
680             GMX_LOG(mdlog.warning).appendText(
681                     "Note: The number of steps is not consistent across multi simulations,\n"
682                     "but we are proceeding anyway!");
683         }
684         if (!multisim_int_all_are_equal(ms, ir->init_step))
685         {
686             GMX_LOG(mdlog.warning).appendText(
687                     "Note: The initial step is not consistent across multi simulations,\n"
688                     "but we are proceeding anyway!");
689         }
690     }
691
692     /* and stop now if we should */
693     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
694     while (!bLastStep)
695     {
696
697         /* Determine if this is a neighbor search step */
698         bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
699
700         if (bPMETune && bNStList)
701         {
702             /* PME grid + cut-off optimization with GPUs or PME nodes */
703             pme_loadbal_do(pme_loadbal, cr,
704                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
705                            fplog, mdlog,
706                            *ir, fr, state->box, state->x,
707                            wcycle,
708                            step, step_rel,
709                            &bPMETunePrinting);
710         }
711
712         wallcycle_start(wcycle, ewcSTEP);
713
714         bLastStep = (step_rel == ir->nsteps);
715         t         = t0 + step*ir->delta_t;
716
717         // TODO Refactor this, so that nstfep does not need a default value of zero
718         if (ir->efep != efepNO || ir->bSimTemp)
719         {
720             /* find and set the current lambdas */
721             setCurrentLambdasLocal(step, ir->fepvals, lam0, state->lambda, state->fep_state);
722
723             bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
724             bDoFEP       = ((ir->efep != efepNO) && do_per_step(step, nstfep));
725             bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
726                             && (ir->bExpanded) && (step > 0) &&
727                             (startingBehavior == StartingBehavior::NewSimulation));
728         }
729
730         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep &&
731                      do_per_step(step, replExParams.exchangeInterval));
732
733         if (doSimulatedAnnealing)
734         {
735             update_annealing_target_temp(ir, t, &upd);
736         }
737
738         /* Stop Center of Mass motion */
739         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
740
741         /* Determine whether or not to do Neighbour Searching */
742         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
743
744         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
745
746         /* do_log triggers energy and virial calculation. Because this leads
747          * to different code paths, forces can be different. Thus for exact
748          * continuation we should avoid extra log output.
749          * Note that the || bLastStep can result in non-exact continuation
750          * beyond the last step. But we don't consider that to be an issue.
751          */
752         do_log     =
753             (do_per_step(step, ir->nstlog) ||
754              (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) ||
755              bLastStep);
756         do_verbose = mdrunOptions.verbose &&
757             (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
758
759         if (bNS && !(bFirstStep && ir->bContinuation))
760         {
761             bMasterState = FALSE;
762             /* Correct the new box if it is too skewed */
763             if (inputrecDynamicBox(ir))
764             {
765                 if (correct_box(fplog, step, state->box, graph))
766                 {
767                     bMasterState = TRUE;
768                 }
769             }
770             if (DOMAINDECOMP(cr) && bMasterState)
771             {
772                 dd_collect_state(cr->dd, state, state_global);
773             }
774
775             if (DOMAINDECOMP(cr))
776             {
777                 /* Repartition the domain decomposition */
778                 dd_partition_system(fplog, mdlog, step, cr,
779                                     bMasterState, nstglobalcomm,
780                                     state_global, *top_global, ir, imdSession,
781                                     pull_work,
782                                     state, &f, mdAtoms, &top, fr,
783                                     vsite, constr,
784                                     nrnb, wcycle,
785                                     do_verbose && !bPMETunePrinting);
786                 shouldCheckNumberOfBondedInteractions = true;
787                 upd.setNumAtoms(state->natoms);
788             }
789         }
790
791         if (MASTER(cr) && do_log)
792         {
793             energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
794         }
795
796         if (ir->efep != efepNO)
797         {
798             update_mdatoms(mdatoms, state->lambda[efptMASS]);
799         }
800
801         if (bExchanged)
802         {
803
804             /* We need the kinetic energy at minus the half step for determining
805              * the full step kinetic energy and possibly for T-coupling.*/
806             /* This may not be quite working correctly yet . . . . */
807             compute_globals(gstat, cr, ir, fr, ekind,
808                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
809                             mdatoms, nrnb, &vcm,
810                             wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
811                             constr, &nullSignaller, state->box,
812                             &totalNumberOfBondedInteractions, &bSumEkinhOld,
813                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
814             checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
815                                             top_global, &top, state->x.rvec_array(), state->box,
816                                             &shouldCheckNumberOfBondedInteractions);
817         }
818         clear_mat(force_vir);
819
820         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
821
822         /* Determine the energy and pressure:
823          * at nstcalcenergy steps and at energy output steps (set below).
824          */
825         if (EI_VV(ir->eI) && (!bInitStep))
826         {
827             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
828             bCalcVir      = bCalcEnerStep ||
829                 (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
830         }
831         else
832         {
833             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
834             bCalcVir      = bCalcEnerStep ||
835                 (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
836         }
837         bCalcEner = bCalcEnerStep;
838
839         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
840
841         if (do_ene || do_log || bDoReplEx)
842         {
843             bCalcVir  = TRUE;
844             bCalcEner = TRUE;
845         }
846
847         /* Do we need global communication ? */
848         bGStat = (bCalcVir || bCalcEner || bStopCM ||
849                   do_per_step(step, nstglobalcomm) ||
850                   (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm)));
851
852         force_flags = (GMX_FORCE_STATECHANGED |
853                        ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) |
854                        GMX_FORCE_ALLFORCES |
855                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
856                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
857                        (bDoFEP ? GMX_FORCE_DHDL : 0)
858                        );
859
860         if (shellfc)
861         {
862             /* Now is the time to relax the shells */
863             relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose,
864                                 enforcedRotation, step,
865                                 ir, imdSession, pull_work, bNS, force_flags, &top,
866                                 constr, enerd, fcd,
867                                 state->natoms,
868                                 state->x.arrayRefWithPadding(),
869                                 state->v.arrayRefWithPadding(),
870                                 state->box,
871                                 state->lambda,
872                                 &state->hist,
873                                 f.arrayRefWithPadding(), force_vir, mdatoms,
874                                 nrnb, wcycle, graph,
875                                 shellfc, fr, runScheduleWork, t, mu_tot,
876                                 vsite,
877                                 ddBalanceRegionHandler);
878         }
879         else
880         {
881             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias is updated
882                (or the AWH update will be performed twice for one step when continuing). It would be best to
883                call this update function from do_md_trajectory_writing but that would occur after do_force.
884                One would have to divide the update_awh function into one function applying the AWH force
885                and one doing the AWH bias update. The update AWH bias function could then be called after
886                do_md_trajectory_writing (then containing update_awh_history).
887                The checkpointing will in the future probably moved to the start of the md loop which will
888                rid of this issue. */
889             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
890             {
891                 awh->updateHistory(state_global->awhHistory.get());
892             }
893
894             /* The coordinates (x) are shifted (to get whole molecules)
895              * in do_force.
896              * This is parallellized as well, and does communication too.
897              * Check comments in sim_util.c
898              */
899             do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession,
900                      pull_work,
901                      step, nrnb, wcycle, &top,
902                      state->box, state->x.arrayRefWithPadding(), &state->hist,
903                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
904                      state->lambda, graph,
905                      fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
906                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
907                      ddBalanceRegionHandler);
908         }
909
910         // VV integrators do not need the following velocity half step
911         // if it is the first step after starting from a checkpoint.
912         // That is, the half step is needed on all other steps, and
913         // also the first step when starting from a .tpr file.
914         if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
915         /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
916         {
917             rvec *vbuf = nullptr;
918
919             wallcycle_start(wcycle, ewcUPDATE);
920             if (ir->eI == eiVV && bInitStep)
921             {
922                 /* if using velocity verlet with full time step Ekin,
923                  * take the first half step only to compute the
924                  * virial for the first step. From there,
925                  * revert back to the initial coordinates
926                  * so that the input is actually the initial step.
927                  */
928                 snew(vbuf, state->natoms);
929                 copy_rvecn(state->v.rvec_array(), vbuf, 0, state->natoms); /* should make this better for parallelizing? */
930             }
931             else
932             {
933                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
934                 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
935             }
936
937             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
938                           ekind, M, &upd, etrtVELOCITY1,
939                           cr, constr);
940
941             wallcycle_stop(wcycle, ewcUPDATE);
942             constrain_velocities(step, nullptr,
943                                  state,
944                                  shake_vir,
945                                  constr,
946                                  bCalcVir, do_log, do_ene);
947             wallcycle_start(wcycle, ewcUPDATE);
948             /* if VV, compute the pressure and constraints */
949             /* For VV2, we strictly only need this if using pressure
950              * control, but we really would like to have accurate pressures
951              * printed out.
952              * Think about ways around this in the future?
953              * For now, keep this choice in comments.
954              */
955             /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
956             /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
957             bPres = TRUE;
958             bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
959             if (bCalcEner && ir->eI == eiVVAK)
960             {
961                 bSumEkinhOld = TRUE;
962             }
963             /* for vv, the first half of the integration actually corresponds to the previous step.
964                So we need information from the last step in the first half of the integration */
965             if (bGStat || do_per_step(step-1, nstglobalcomm))
966             {
967                 wallcycle_stop(wcycle, ewcUPDATE);
968                 compute_globals(gstat, cr, ir, fr, ekind,
969                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
970                                 mdatoms, nrnb, &vcm,
971                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
972                                 constr, &nullSignaller, state->box,
973                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
974                                 (bGStat ? CGLO_GSTAT : 0)
975                                 | (bCalcEner ? CGLO_ENERGY : 0)
976                                 | (bTemp ? CGLO_TEMPERATURE : 0)
977                                 | (bPres ? CGLO_PRESSURE : 0)
978                                 | (bPres ? CGLO_CONSTRAINT : 0)
979                                 | (bStopCM ? CGLO_STOPCM : 0)
980                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
981                                 | CGLO_SCALEEKIN
982                                 );
983                 /* explanation of above:
984                    a) We compute Ekin at the full time step
985                    if 1) we are using the AveVel Ekin, and it's not the
986                    initial step, or 2) if we are using AveEkin, but need the full
987                    time step kinetic energy for the pressure (always true now, since we want accurate statistics).
988                    b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
989                    EkinAveVel because it's needed for the pressure */
990                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
991                                                 top_global, &top, state->x.rvec_array(), state->box,
992                                                 &shouldCheckNumberOfBondedInteractions);
993                 if (bStopCM)
994                 {
995                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
996                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
997                 }
998                 wallcycle_start(wcycle, ewcUPDATE);
999             }
1000             /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1001             if (!bInitStep)
1002             {
1003                 if (bTrotter)
1004                 {
1005                     m_add(force_vir, shake_vir, total_vir);     /* we need the un-dispersion corrected total vir here */
1006                     trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
1007
1008                     /* TODO This is only needed when we're about to write
1009                      * a checkpoint, because we use it after the restart
1010                      * (in a kludge?). But what should we be doing if
1011                      * the startingBehavior is NewSimulation or bInitStep are true? */
1012                     if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
1013                     {
1014                         copy_mat(shake_vir, state->svir_prev);
1015                         copy_mat(force_vir, state->fvir_prev);
1016                     }
1017                     if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
1018                     {
1019                         /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1020                         enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
1021                         enerd->term[F_EKIN] = trace(ekind->ekin);
1022                     }
1023                 }
1024                 else if (bExchanged)
1025                 {
1026                     wallcycle_stop(wcycle, ewcUPDATE);
1027                     /* We need the kinetic energy at minus the half step for determining
1028                      * the full step kinetic energy and possibly for T-coupling.*/
1029                     /* This may not be quite working correctly yet . . . . */
1030                     compute_globals(gstat, cr, ir, fr, ekind,
1031                                     state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1032                                     mdatoms, nrnb, &vcm,
1033                                     wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
1034                                     constr, &nullSignaller, state->box,
1035                                     nullptr, &bSumEkinhOld,
1036                                     CGLO_GSTAT | CGLO_TEMPERATURE);
1037                     wallcycle_start(wcycle, ewcUPDATE);
1038                 }
1039             }
1040             /* if it's the initial step, we performed this first step just to get the constraint virial */
1041             if (ir->eI == eiVV && bInitStep)
1042             {
1043                 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
1044                 sfree(vbuf);
1045             }
1046             wallcycle_stop(wcycle, ewcUPDATE);
1047         }
1048
1049         /* compute the conserved quantity */
1050         if (EI_VV(ir->eI))
1051         {
1052             saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1053             if (ir->eI == eiVV)
1054             {
1055                 last_ekin = enerd->term[F_EKIN];
1056             }
1057             if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1058             {
1059                 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1060             }
1061             /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
1062             if (ir->efep != efepNO)
1063             {
1064                 sum_dhdl(enerd, state->lambda, *ir->fepvals);
1065             }
1066         }
1067
1068         /* ########  END FIRST UPDATE STEP  ############## */
1069         /* ########  If doing VV, we now have v(dt) ###### */
1070         if (bDoExpanded)
1071         {
1072             /* perform extended ensemble sampling in lambda - we don't
1073                actually move to the new state before outputting
1074                statistics, but if performing simulated tempering, we
1075                do update the velocities and the tau_t. */
1076
1077             lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, state->dfhist, step, state->v.rvec_array(), mdatoms);
1078             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1079             if (MASTER(cr))
1080             {
1081                 copy_df_history(state_global->dfhist, state->dfhist);
1082             }
1083         }
1084
1085         /* Now we have the energies and forces corresponding to the
1086          * coordinates at time t. We must output all of this before
1087          * the update.
1088          */
1089         do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
1090                                  ir, state, state_global, observablesHistory,
1091                                  top_global, fr,
1092                                  outf, energyOutput, ekind, f,
1093                                  checkpointHandler->isCheckpointingStep(),
1094                                  bRerunMD, bLastStep,
1095                                  mdrunOptions.writeConfout,
1096                                  bSumEkinhOld);
1097         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1098         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1099
1100         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1101         if (startingBehavior != StartingBehavior::NewSimulation &&
1102             (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1103         {
1104             copy_mat(state->svir_prev, shake_vir);
1105             copy_mat(state->fvir_prev, force_vir);
1106         }
1107
1108         stopHandler->setSignal();
1109         resetHandler->setSignal(walltime_accounting);
1110
1111         if (bGStat || !PAR(cr))
1112         {
1113             /* In parallel we only have to check for checkpointing in steps
1114              * where we do global communication,
1115              *  otherwise the other nodes don't know.
1116              */
1117             checkpointHandler->setSignal(walltime_accounting);
1118         }
1119
1120         /* #########   START SECOND UPDATE STEP ################# */
1121
1122         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
1123            in preprocessing */
1124
1125         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1126         {
1127             gmx_bool bIfRandomize;
1128             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1129             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1130             if (constr && bIfRandomize)
1131             {
1132                 constrain_velocities(step, nullptr,
1133                                      state,
1134                                      tmp_vir,
1135                                      constr,
1136                                      bCalcVir, do_log, do_ene);
1137             }
1138         }
1139         /* Box is changed in update() when we do pressure coupling,
1140          * but we should still use the old box for energy corrections and when
1141          * writing it to the energy file, so it matches the trajectory files for
1142          * the same timestep above. Make a copy in a separate array.
1143          */
1144         copy_mat(state->box, lastbox);
1145
1146         dvdl_constr = 0;
1147
1148         wallcycle_start(wcycle, ewcUPDATE);
1149         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1150         if (bTrotter)
1151         {
1152             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1153             /* We can only do Berendsen coupling after we have summed
1154              * the kinetic energy or virial. Since the happens
1155              * in global_state after update, we should only do it at
1156              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1157              */
1158         }
1159         else
1160         {
1161             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1162             update_pcouple_before_coordinates(fplog, step, ir, state,
1163                                               parrinellorahmanMu, M,
1164                                               bInitStep);
1165         }
1166
1167         if (EI_VV(ir->eI))
1168         {
1169             /* velocity half-step update */
1170             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1171                           ekind, M, &upd, etrtVELOCITY2,
1172                           cr, constr);
1173         }
1174
1175         /* Above, initialize just copies ekinh into ekin,
1176          * it doesn't copy position (for VV),
1177          * and entire integrator for MD.
1178          */
1179
1180         if (ir->eI == eiVVAK)
1181         {
1182             cbuf.resize(state->x.size());
1183             std::copy(state->x.begin(), state->x.end(), cbuf.begin());
1184         }
1185
1186         if (useGpuForUpdate)
1187         {
1188             if (bNS)
1189             {
1190                 integrator->set(top.idef, *mdatoms, ekind->ngtc);
1191                 t_pbc pbc;
1192                 set_pbc(&pbc, epbcXYZ, state->box);
1193                 integrator->setPbc(&pbc);
1194             }
1195             integrator->copyCoordinatesToGpu(state->x.rvec_array());
1196             integrator->copyVelocitiesToGpu(state->v.rvec_array());
1197             integrator->copyForcesToGpu(as_rvec_array(f.data()));
1198
1199             bool doTempCouple     = (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1200             bool doPressureCouple = (ir->epc == epcPARRINELLORAHMAN && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1201
1202             // This applies Leap-Frog, LINCS and SETTLE in succession
1203             integrator->integrate(ir->delta_t, true, bCalcVir, shake_vir,
1204                                   doTempCouple, ekind->tcstat,
1205                                   doPressureCouple, ir->nstpcouple*ir->delta_t, M);
1206
1207             integrator->copyCoordinatesFromGpu(state->x.rvec_array());
1208             integrator->copyVelocitiesFromGpu(state->v.rvec_array());
1209         }
1210         else
1211         {
1212             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1213                           ekind, M, &upd, etrtPOSITION, cr, constr);
1214
1215             wallcycle_stop(wcycle, ewcUPDATE);
1216
1217             constrain_coordinates(step, &dvdl_constr, state,
1218                                   shake_vir,
1219                                   &upd, constr,
1220                                   bCalcVir, do_log, do_ene);
1221
1222             update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state,
1223                                   cr, nrnb, wcycle, &upd, constr, do_log, do_ene);
1224             finish_update(ir, mdatoms,
1225                           state, graph,
1226                           nrnb, wcycle, &upd, constr);
1227         }
1228
1229         if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1230         {
1231             updatePrevStepPullCom(pull_work, state);
1232         }
1233
1234         if (ir->eI == eiVVAK)
1235         {
1236             /* erase F_EKIN and F_TEMP here? */
1237             /* just compute the kinetic energy at the half step to perform a trotter step */
1238             compute_globals(gstat, cr, ir, fr, ekind,
1239                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1240                             mdatoms, nrnb, &vcm,
1241                             wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1242                             constr, &nullSignaller, lastbox,
1243                             nullptr, &bSumEkinhOld,
1244                             (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE
1245                             );
1246             wallcycle_start(wcycle, ewcUPDATE);
1247             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1248             /* now we know the scaling, we can compute the positions again */
1249             std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
1250
1251             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1252                           ekind, M, &upd, etrtPOSITION, cr, constr);
1253             wallcycle_stop(wcycle, ewcUPDATE);
1254
1255             /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1256             /* are the small terms in the shake_vir here due
1257              * to numerical errors, or are they important
1258              * physically? I'm thinking they are just errors, but not completely sure.
1259              * For now, will call without actually constraining, constr=NULL*/
1260             finish_update(ir, mdatoms,
1261                           state, graph,
1262                           nrnb, wcycle, &upd, nullptr);
1263         }
1264         if (EI_VV(ir->eI))
1265         {
1266             /* this factor or 2 correction is necessary
1267                because half of the constraint force is removed
1268                in the vv step, so we have to double it.  See
1269                the Redmine issue #1255.  It is not yet clear
1270                if the factor of 2 is exact, or just a very
1271                good approximation, and this will be
1272                investigated.  The next step is to see if this
1273                can be done adding a dhdl contribution from the
1274                rattle step, but this is somewhat more
1275                complicated with the current code. Will be
1276                investigated, hopefully for 4.6.3. However,
1277                this current solution is much better than
1278                having it completely wrong.
1279              */
1280             enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
1281         }
1282         else
1283         {
1284             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1285         }
1286
1287         if (vsite != nullptr)
1288         {
1289             wallcycle_start(wcycle, ewcVSITECONSTR);
1290             if (graph != nullptr)
1291             {
1292                 shift_self(graph, state->box, state->x.rvec_array());
1293             }
1294             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
1295                              top.idef.iparams, top.idef.il,
1296                              fr->ePBC, fr->bMolPBC, cr, state->box);
1297
1298             if (graph != nullptr)
1299             {
1300                 unshift_self(graph, state->box, state->x.rvec_array());
1301             }
1302             wallcycle_stop(wcycle, ewcVSITECONSTR);
1303         }
1304
1305         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1306         /* With Leap-Frog we can skip compute_globals at
1307          * non-communication steps, but we need to calculate
1308          * the kinetic energy one step before communication.
1309          */
1310         {
1311             // Organize to do inter-simulation signalling on steps if
1312             // and when algorithms require it.
1313             bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1314
1315             if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal)
1316             {
1317                 // Since we're already communicating at this step, we
1318                 // can propagate intra-simulation signals. Note that
1319                 // check_nstglobalcomm has the responsibility for
1320                 // choosing the value of nstglobalcomm that is one way
1321                 // bGStat becomes true, so we can't get into a
1322                 // situation where e.g. checkpointing can't be
1323                 // signalled.
1324                 bool                doIntraSimSignal = true;
1325                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1326
1327                 compute_globals(gstat, cr, ir, fr, ekind,
1328                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1329                                 mdatoms, nrnb, &vcm,
1330                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1331                                 constr, &signaller,
1332                                 lastbox,
1333                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1334                                 (bGStat ? CGLO_GSTAT : 0)
1335                                 | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1336                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1337                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1338                                 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
1339                                 | CGLO_CONSTRAINT
1340                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1341                                 );
1342                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1343                                                 top_global, &top, state->x.rvec_array(), state->box,
1344                                                 &shouldCheckNumberOfBondedInteractions);
1345                 if (!EI_VV(ir->eI) && bStopCM)
1346                 {
1347                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1348                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1349                 }
1350             }
1351         }
1352
1353         /* #############  END CALC EKIN AND PRESSURE ################# */
1354
1355         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1356            the virial that should probably be addressed eventually. state->veta has better properies,
1357            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1358            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1359
1360         if (ir->efep != efepNO && !EI_VV(ir->eI))
1361         {
1362             /* Sum up the foreign energy and dhdl terms for md and sd.
1363                Currently done every step so that dhdl is correct in the .edr */
1364             sum_dhdl(enerd, state->lambda, *ir->fepvals);
1365         }
1366
1367         update_pcouple_after_coordinates(fplog, step, ir, mdatoms,
1368                                          pres, force_vir, shake_vir,
1369                                          parrinellorahmanMu,
1370                                          state, nrnb, &upd);
1371
1372         /* ################# END UPDATE STEP 2 ################# */
1373         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1374
1375         /* The coordinates (x) were unshifted in update */
1376         if (!bGStat)
1377         {
1378             /* We will not sum ekinh_old,
1379              * so signal that we still have to do it.
1380              */
1381             bSumEkinhOld = TRUE;
1382         }
1383
1384         if (bCalcEner)
1385         {
1386             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1387
1388             /* use the directly determined last velocity, not actually the averaged half steps */
1389             if (bTrotter && ir->eI == eiVV)
1390             {
1391                 enerd->term[F_EKIN] = last_ekin;
1392             }
1393             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1394
1395             if (integratorHasConservedEnergyQuantity(ir))
1396             {
1397                 if (EI_VV(ir->eI))
1398                 {
1399                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1400                 }
1401                 else
1402                 {
1403                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1404                 }
1405             }
1406             /* #########  END PREPARING EDR OUTPUT  ###########  */
1407         }
1408
1409         /* Output stuff */
1410         if (MASTER(cr))
1411         {
1412             if (fplog && do_log && bDoExpanded)
1413             {
1414                 /* only needed if doing expanded ensemble */
1415                 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : nullptr,
1416                                           state_global->dfhist, state->fep_state, ir->nstlog, step);
1417             }
1418             if (bCalcEner)
1419             {
1420                 energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep,
1421                                                  t, mdatoms->tmass, enerd, state,
1422                                                  ir->fepvals, ir->expandedvals, lastbox,
1423                                                  shake_vir, force_vir, total_vir, pres,
1424                                                  ekind, mu_tot, constr);
1425             }
1426             else
1427             {
1428                 energyOutput.recordNonEnergyStep();
1429             }
1430
1431             gmx_bool do_dr  = do_per_step(step, ir->nstdisreout);
1432             gmx_bool do_or  = do_per_step(step, ir->nstorireout);
1433
1434             if (doSimulatedAnnealing)
1435             {
1436                 energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
1437             }
1438             if (do_log || do_ene || do_dr || do_or)
1439             {
1440                 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1441                                                    do_log ? fplog : nullptr,
1442                                                    step, t,
1443                                                    fcd, awh.get());
1444             }
1445
1446             if (ir->bPull)
1447             {
1448                 pull_print_output(pull_work, step, t);
1449             }
1450
1451             if (do_per_step(step, ir->nstlog))
1452             {
1453                 if (fflush(fplog) != 0)
1454                 {
1455                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1456                 }
1457             }
1458         }
1459         if (bDoExpanded)
1460         {
1461             /* Have to do this part _after_ outputting the logfile and the edr file */
1462             /* Gets written into the state at the beginning of next loop*/
1463             state->fep_state = lamnew;
1464         }
1465         /* Print the remaining wall clock time for the run */
1466         if (isMasterSimMasterRank(ms, MASTER(cr)) &&
1467             (do_verbose || gmx_got_usr_signal()) &&
1468             !bPMETunePrinting)
1469         {
1470             if (shellfc)
1471             {
1472                 fprintf(stderr, "\n");
1473             }
1474             print_time(stderr, walltime_accounting, step, ir, cr);
1475         }
1476
1477         /* Ion/water position swapping.
1478          * Not done in last step since trajectory writing happens before this call
1479          * in the MD loop and exchanges would be lost anyway. */
1480         bNeedRepartition = FALSE;
1481         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
1482             do_per_step(step, ir->swap->nstswap))
1483         {
1484             bNeedRepartition = do_swapcoords(cr, step, t, ir, swap, wcycle,
1485                                              as_rvec_array(state->x.data()),
1486                                              state->box,
1487                                              MASTER(cr) && mdrunOptions.verbose,
1488                                              bRerunMD);
1489
1490             if (bNeedRepartition && DOMAINDECOMP(cr))
1491             {
1492                 dd_collect_state(cr->dd, state, state_global);
1493             }
1494         }
1495
1496         /* Replica exchange */
1497         bExchanged = FALSE;
1498         if (bDoReplEx)
1499         {
1500             bExchanged = replica_exchange(fplog, cr, ms, repl_ex,
1501                                           state_global, enerd,
1502                                           state, step, t);
1503         }
1504
1505         if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
1506         {
1507             dd_partition_system(fplog, mdlog, step, cr, TRUE, 1,
1508                                 state_global, *top_global, ir, imdSession,
1509                                 pull_work,
1510                                 state, &f, mdAtoms, &top, fr,
1511                                 vsite, constr,
1512                                 nrnb, wcycle, FALSE);
1513             shouldCheckNumberOfBondedInteractions = true;
1514             upd.setNumAtoms(state->natoms);
1515         }
1516
1517         bFirstStep             = FALSE;
1518         bInitStep              = FALSE;
1519
1520         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1521         /* With all integrators, except VV, we need to retain the pressure
1522          * at the current step for coupling at the next step.
1523          */
1524         if ((state->flags & (1U<<estPRES_PREV)) &&
1525             (bGStatEveryStep ||
1526              (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1527         {
1528             /* Store the pressure in t_state for pressure coupling
1529              * at the next MD step.
1530              */
1531             copy_mat(pres, state->pres_prev);
1532         }
1533
1534         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1535
1536         if ( (membed != nullptr) && (!bLastStep) )
1537         {
1538             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1539         }
1540
1541         cycles = wallcycle_stop(wcycle, ewcSTEP);
1542         if (DOMAINDECOMP(cr) && wcycle)
1543         {
1544             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1545         }
1546
1547         /* increase the MD step number */
1548         step++;
1549         step_rel++;
1550
1551         resetHandler->resetCounters(
1552                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(),
1553                 nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1554
1555         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1556         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1557
1558     }
1559     /* End of main MD loop */
1560
1561     /* Closing TNG files can include compressing data. Therefore it is good to do that
1562      * before stopping the time measurements. */
1563     mdoutf_tng_close(outf);
1564
1565     /* Stop measuring walltime */
1566     walltime_accounting_end_time(walltime_accounting);
1567
1568     if (!thisRankHasDuty(cr, DUTY_PME))
1569     {
1570         /* Tell the PME only node to finish */
1571         gmx_pme_send_finish(cr);
1572     }
1573
1574     if (MASTER(cr))
1575     {
1576         if (ir->nstcalcenergy > 0)
1577         {
1578             energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
1579             energyOutput.printAverages(fplog, groups);
1580         }
1581     }
1582     done_mdoutf(outf);
1583
1584     if (bPMETune)
1585     {
1586         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1587     }
1588
1589     done_shellfc(fplog, shellfc, step_rel);
1590
1591     if (useReplicaExchange && MASTER(cr))
1592     {
1593         print_replica_exchange_statistics(fplog, repl_ex);
1594     }
1595
1596     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1597
1598     global_stat_destroy(gstat);
1599
1600 }