7bf96f6292db6a42f6055bdb655e5c1cb93466a2
[alexxy/gromacs.git] / src / gromacs / mdrun / md.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief Implements the integrator for normal molecular dynamics simulations
40  *
41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42  * \ingroup module_mdrun
43  */
44 #include "gmxpre.h"
45
46 #include <cinttypes>
47 #include <cmath>
48 #include <cstdio>
49 #include <cstdlib>
50
51 #include <algorithm>
52 #include <memory>
53
54 #include "gromacs/awh/awh.h"
55 #include "gromacs/commandline/filenm.h"
56 #include "gromacs/domdec/collect.h"
57 #include "gromacs/domdec/dlbtiming.h"
58 #include "gromacs/domdec/domdec.h"
59 #include "gromacs/domdec/domdec_network.h"
60 #include "gromacs/domdec/domdec_struct.h"
61 #include "gromacs/domdec/mdsetup.h"
62 #include "gromacs/domdec/partition.h"
63 #include "gromacs/essentialdynamics/edsam.h"
64 #include "gromacs/ewald/pme.h"
65 #include "gromacs/ewald/pme_load_balancing.h"
66 #include "gromacs/fileio/trxio.h"
67 #include "gromacs/gmxlib/network.h"
68 #include "gromacs/gmxlib/nrnb.h"
69 #include "gromacs/gpu_utils/gpu_utils.h"
70 #include "gromacs/imd/imd.h"
71 #include "gromacs/listed_forces/manage_threading.h"
72 #include "gromacs/math/functions.h"
73 #include "gromacs/math/utilities.h"
74 #include "gromacs/math/vec.h"
75 #include "gromacs/math/vectypes.h"
76 #include "gromacs/mdlib/checkpointhandler.h"
77 #include "gromacs/mdlib/compute_io.h"
78 #include "gromacs/mdlib/constr.h"
79 #include "gromacs/mdlib/ebin.h"
80 #include "gromacs/mdlib/enerdata_utils.h"
81 #include "gromacs/mdlib/energyoutput.h"
82 #include "gromacs/mdlib/expanded.h"
83 #include "gromacs/mdlib/force.h"
84 #include "gromacs/mdlib/force_flags.h"
85 #include "gromacs/mdlib/forcerec.h"
86 #include "gromacs/mdlib/md_support.h"
87 #include "gromacs/mdlib/mdatoms.h"
88 #include "gromacs/mdlib/mdoutf.h"
89 #include "gromacs/mdlib/membed.h"
90 #include "gromacs/mdlib/resethandler.h"
91 #include "gromacs/mdlib/sighandler.h"
92 #include "gromacs/mdlib/simulationsignal.h"
93 #include "gromacs/mdlib/stat.h"
94 #include "gromacs/mdlib/stophandler.h"
95 #include "gromacs/mdlib/tgroup.h"
96 #include "gromacs/mdlib/trajectory_writing.h"
97 #include "gromacs/mdlib/update.h"
98 #include "gromacs/mdlib/update_constrain_cuda.h"
99 #include "gromacs/mdlib/vcm.h"
100 #include "gromacs/mdlib/vsite.h"
101 #include "gromacs/mdrunutility/handlerestart.h"
102 #include "gromacs/mdrunutility/multisim.h"
103 #include "gromacs/mdrunutility/printtime.h"
104 #include "gromacs/mdtypes/awh_history.h"
105 #include "gromacs/mdtypes/awh_params.h"
106 #include "gromacs/mdtypes/commrec.h"
107 #include "gromacs/mdtypes/df_history.h"
108 #include "gromacs/mdtypes/energyhistory.h"
109 #include "gromacs/mdtypes/fcdata.h"
110 #include "gromacs/mdtypes/forcerec.h"
111 #include "gromacs/mdtypes/group.h"
112 #include "gromacs/mdtypes/inputrec.h"
113 #include "gromacs/mdtypes/interaction_const.h"
114 #include "gromacs/mdtypes/md_enums.h"
115 #include "gromacs/mdtypes/mdatom.h"
116 #include "gromacs/mdtypes/mdrunoptions.h"
117 #include "gromacs/mdtypes/observableshistory.h"
118 #include "gromacs/mdtypes/pullhistory.h"
119 #include "gromacs/mdtypes/state.h"
120 #include "gromacs/nbnxm/nbnxm.h"
121 #include "gromacs/pbcutil/mshift.h"
122 #include "gromacs/pbcutil/pbc.h"
123 #include "gromacs/pulling/output.h"
124 #include "gromacs/pulling/pull.h"
125 #include "gromacs/swap/swapcoords.h"
126 #include "gromacs/timing/wallcycle.h"
127 #include "gromacs/timing/walltime_accounting.h"
128 #include "gromacs/topology/atoms.h"
129 #include "gromacs/topology/idef.h"
130 #include "gromacs/topology/mtop_util.h"
131 #include "gromacs/topology/topology.h"
132 #include "gromacs/trajectory/trajectoryframe.h"
133 #include "gromacs/utility/basedefinitions.h"
134 #include "gromacs/utility/cstringutil.h"
135 #include "gromacs/utility/fatalerror.h"
136 #include "gromacs/utility/logger.h"
137 #include "gromacs/utility/real.h"
138 #include "gromacs/utility/smalloc.h"
139
140 #include "legacysimulator.h"
141 #include "replicaexchange.h"
142 #include "shellfc.h"
143
144 #if GMX_FAHCORE
145 #include "corewrap.h"
146 #endif
147
148 using gmx::SimulationSignaller;
149
150 //! Whether the GPU versions of Leap-Frog integrator and LINCS and SETTLE constraints
151 static const bool c_useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
152
153 void gmx::LegacySimulator::do_md()
154 {
155     // TODO Historically, the EM and MD "integrators" used different
156     // names for the t_inputrec *parameter, but these must have the
157     // same name, now that it's a member of a struct. We use this ir
158     // alias to avoid a large ripple of nearly useless changes.
159     // t_inputrec is being replaced by IMdpOptionsProvider, so this
160     // will go away eventually.
161     t_inputrec             *ir   = inputrec;
162     int64_t                 step, step_rel;
163     double                  t, t0 = ir->init_t, lam0[efptNR];
164     gmx_bool                bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
165     gmx_bool                bNS, bNStList, bStopCM,
166                             bFirstStep, bInitStep, bLastStep = FALSE;
167     gmx_bool                bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
168     gmx_bool                do_ene, do_log, do_verbose;
169     gmx_bool                bMasterState;
170     unsigned int            force_flags;
171     tensor                  force_vir = {{0}}, shake_vir = {{0}}, total_vir = {{0}},
172                             tmp_vir   = {{0}}, pres = {{0}};
173     int                     i, m;
174     rvec                    mu_tot;
175     matrix                  parrinellorahmanMu, M;
176     gmx_repl_ex_t           repl_ex = nullptr;
177     gmx_localtop_t          top;
178     PaddedVector<gmx::RVec> f {};
179     gmx_global_stat_t       gstat;
180     t_graph                *graph = nullptr;
181     gmx_shellfc_t          *shellfc;
182     gmx_bool                bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
183     gmx_bool                bTemp, bPres, bTrotter;
184     real                    dvdl_constr;
185     std::vector<RVec>       cbuf;
186     matrix                  lastbox;
187     int                     lamnew  = 0;
188     /* for FEP */
189     int                     nstfep = 0;
190     double                  cycles;
191     real                    saved_conserved_quantity = 0;
192     real                    last_ekin                = 0;
193     t_extmass               MassQ;
194     char                    sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
195
196     /* PME load balancing data for GPU kernels */
197     gmx_bool              bPMETune         = FALSE;
198     gmx_bool              bPMETunePrinting = FALSE;
199
200     bool                  bInteractiveMDstep = false;
201
202     /* Domain decomposition could incorrectly miss a bonded
203        interaction, but checking for that requires a global
204        communication stage, which does not otherwise happen in DD
205        code. So we do that alongside the first global energy reduction
206        after a new DD is made. These variables handle whether the
207        check happens, and the result it returns. */
208     bool              shouldCheckNumberOfBondedInteractions = false;
209     int               totalNumberOfBondedInteractions       = -1;
210
211     SimulationSignals signals;
212     // Most global communnication stages don't propagate mdrun
213     // signals, and will use this object to achieve that.
214     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
215
216     if (!mdrunOptions.writeConfout)
217     {
218         // This is on by default, and the main known use case for
219         // turning it off is for convenience in benchmarking, which is
220         // something that should not show up in the general user
221         // interface.
222         GMX_LOG(mdlog.info).asParagraph().
223             appendText("The -noconfout functionality is deprecated, and may be removed in a future version.");
224     }
225
226     /* md-vv uses averaged full step velocities for T-control
227        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
228        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
229     bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
230
231     const bool bRerunMD      = false;
232
233     int        nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
234     bGStatEveryStep = (nstglobalcomm == 1);
235
236     SimulationGroups                  *groups = &top_global->groups;
237
238     std::unique_ptr<EssentialDynamics> ed = nullptr;
239     if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr)
240     {
241         /* Initialize essential dynamics sampling */
242         ed = init_edsam(mdlog,
243                         opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm),
244                         top_global,
245                         ir, cr, constr,
246                         state_global, observablesHistory,
247                         oenv,
248                         startingBehavior);
249     }
250
251     initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
252     Update upd(ir, deform);
253     bool   doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
254     if (startingBehavior != StartingBehavior::RestartWithAppending)
255     {
256         pleaseCiteCouplingAlgorithms(fplog, *ir);
257     }
258     gmx_mdoutf       *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, top_global, oenv, wcycle,
259                                          StartingBehavior::NewSimulation);
260     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, mdoutf_get_fp_dhdl(outf), false);
261
262     gstat = global_stat_init(ir);
263
264     /* Check for polarizable models and flexible constraints */
265     shellfc = init_shell_flexcon(fplog,
266                                  top_global, constr ? constr->numFlexibleConstraints() : 0,
267                                  ir->nstcalcenergy, DOMAINDECOMP(cr));
268
269     {
270         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
271         if ((io > 2000) && MASTER(cr))
272         {
273             fprintf(stderr,
274                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
275                     io);
276         }
277     }
278
279     // Local state only becomes valid now.
280     std::unique_ptr<t_state> stateInstance;
281     t_state *                state;
282
283
284     auto mdatoms = mdAtoms->mdatoms();
285
286     std::unique_ptr<UpdateConstrainCuda> integrator;
287
288     if (DOMAINDECOMP(cr))
289     {
290         dd_init_local_top(*top_global, &top);
291
292         stateInstance = std::make_unique<t_state>();
293         state         = stateInstance.get();
294         dd_init_local_state(cr->dd, state_global, state);
295
296         /* Distribute the charge groups over the nodes from the master node */
297         dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1,
298                             state_global, *top_global, ir, imdSession,
299                             pull_work,
300                             state, &f, mdAtoms, &top, fr,
301                             vsite, constr,
302                             nrnb, nullptr, FALSE);
303         shouldCheckNumberOfBondedInteractions = true;
304         upd.setNumAtoms(state->natoms);
305     }
306     else
307     {
308         state_change_natoms(state_global, state_global->natoms);
309         f.resizeWithPadding(state_global->natoms);
310         /* Copy the pointer to the global state */
311         state = state_global;
312
313         /* Generate and initialize new topology */
314         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr,
315                                   &graph, mdAtoms, constr, vsite, shellfc);
316
317         upd.setNumAtoms(state->natoms);
318     }
319
320     if (c_useGpuUpdateConstrain)
321     {
322         GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU.");
323         GMX_RELEASE_ASSERT(ir->etc != etcNOSEHOOVER, "Nose Hoover temperature coupling is not supported on the GPU.");
324         GMX_RELEASE_ASSERT(ir->epc == epcNO, "Pressure coupling is not supported on the GPU.");
325         GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU");
326         GMX_RELEASE_ASSERT(ed == nullptr, "Essential dynamics is not supported with GPU-based update constraints.");
327         GMX_LOG(mdlog.info).asParagraph().
328             appendText("Using CUDA GPU-based update and constraints module.");
329         integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global);
330         integrator->set(top.idef, *mdatoms, ekind->ngtc);
331         t_pbc pbc;
332         set_pbc(&pbc, epbcXYZ, state->box);
333         integrator->setPbc(&pbc);
334     }
335
336     if (fr->nbv->useGpu())
337     {
338         changePinningPolicy(&state->x, gmx::PinningPolicy::PinnedIfSupported);
339     }
340
341     // NOTE: The global state is no longer used at this point.
342     // But state_global is still used as temporary storage space for writing
343     // the global state to file and potentially for replica exchange.
344     // (Global topology should persist.)
345
346     update_mdatoms(mdatoms, state->lambda[efptMASS]);
347
348     if (ir->bExpanded)
349     {
350         /* Check nstexpanded here, because the grompp check was broken */
351         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
352         {
353             gmx_fatal(FARGS, "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
354         }
355         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation,
356                                ir, state->dfhist);
357     }
358
359     if (MASTER(cr))
360     {
361         if (startingBehavior != StartingBehavior::NewSimulation)
362         {
363             /* Restore from energy history if appending to output files */
364             if (startingBehavior == StartingBehavior::RestartWithAppending)
365             {
366                 /* If no history is available (because a checkpoint is from before
367                  * it was written) make a new one later, otherwise restore it.
368                  */
369                 if (observablesHistory->energyHistory)
370                 {
371                     energyOutput.restoreFromEnergyHistory(*observablesHistory->energyHistory);
372                 }
373             }
374             else if (observablesHistory->energyHistory)
375             {
376                 /* We might have read an energy history from checkpoint.
377                  * As we are not appending, we want to restart the statistics.
378                  * Free the allocated memory and reset the counts.
379                  */
380                 observablesHistory->energyHistory = {};
381                 /* We might have read a pull history from checkpoint.
382                  * We will still want to keep the statistics, so that the files
383                  * can be joined and still be meaningful.
384                  * This means that observablesHistory->pullHistory
385                  * should not be reset.
386                  */
387             }
388         }
389         if (!observablesHistory->energyHistory)
390         {
391             observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
392         }
393         if (!observablesHistory->pullHistory)
394         {
395             observablesHistory->pullHistory = std::make_unique<PullHistory>();
396         }
397         /* Set the initial energy history */
398         energyOutput.fillEnergyHistory(observablesHistory->energyHistory.get());
399     }
400
401     preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
402                            startingBehavior != StartingBehavior::NewSimulation);
403
404     // TODO: Remove this by converting AWH into a ForceProvider
405     auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingBehavior != StartingBehavior::NewSimulation,
406                                 shellfc != nullptr,
407                                 opt2fn("-awh", nfile, fnm), pull_work);
408
409     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
410     if (useReplicaExchange && MASTER(cr))
411     {
412         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir,
413                                         replExParams);
414     }
415     /* PME tuning is only supported in the Verlet scheme, with PME for
416      * Coulomb. It is not supported with only LJ PME. */
417     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) &&
418                 !mdrunOptions.reproducible && ir->cutoff_scheme != ecutsGROUP);
419
420     pme_load_balancing_t *pme_loadbal      = nullptr;
421     if (bPMETune)
422     {
423         pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
424                          *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu(),
425                          &bPMETunePrinting);
426     }
427
428     if (!ir->bContinuation)
429     {
430         if (state->flags & (1U << estV))
431         {
432             auto v = makeArrayRef(state->v);
433             /* Set the velocities of vsites, shells and frozen atoms to zero */
434             for (i = 0; i < mdatoms->homenr; i++)
435             {
436                 if (mdatoms->ptype[i] == eptVSite ||
437                     mdatoms->ptype[i] == eptShell)
438                 {
439                     clear_rvec(v[i]);
440                 }
441                 else if (mdatoms->cFREEZE)
442                 {
443                     for (m = 0; m < DIM; m++)
444                     {
445                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
446                         {
447                             v[i][m] = 0;
448                         }
449                     }
450                 }
451             }
452         }
453
454         if (constr)
455         {
456             /* Constrain the initial coordinates and velocities */
457             do_constrain_first(fplog, constr, ir, mdatoms, state);
458         }
459         if (vsite)
460         {
461             /* Construct the virtual sites for the initial configuration */
462             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr,
463                              top.idef.iparams, top.idef.il,
464                              fr->ePBC, fr->bMolPBC, cr, state->box);
465         }
466     }
467
468     if (ir->efep != efepNO)
469     {
470         /* Set free energy calculation frequency as the greatest common
471          * denominator of nstdhdl and repl_ex_nst. */
472         nstfep = ir->fepvals->nstdhdl;
473         if (ir->bExpanded)
474         {
475             nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
476         }
477         if (useReplicaExchange)
478         {
479             nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
480         }
481     }
482
483     /* Be REALLY careful about what flags you set here. You CANNOT assume
484      * this is the first step, since we might be restarting from a checkpoint,
485      * and in that case we should not do any modifications to the state.
486      */
487     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
488
489     // When restarting from a checkpoint, it can be appropriate to
490     // initialize ekind from quantities in the checkpoint. Otherwise,
491     // compute_globals must initialize ekind before the simulation
492     // starts/restarts. However, only the master rank knows what was
493     // found in the checkpoint file, so we have to communicate in
494     // order to coordinate the restart.
495     //
496     // TODO Consider removing this communication if/when checkpoint
497     // reading directly follows .tpr reading, because all ranks can
498     // agree on hasReadEkinState at that time.
499     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
500     if (PAR(cr))
501     {
502         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
503     }
504     if (hasReadEkinState)
505     {
506         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
507     }
508
509     unsigned int cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
510                                | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
511                                | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0)
512                                | (hasReadEkinState ? CGLO_READEKIN : 0));
513
514     bSumEkinhOld = FALSE;
515
516     t_vcm vcm(top_global->groups, *ir);
517     reportComRemovalInfo(fplog, vcm);
518
519     /* To minimize communication, compute_globals computes the COM velocity
520      * and the kinetic energy for the velocities without COM motion removed.
521      * Thus to get the kinetic energy without the COM contribution, we need
522      * to call compute_globals twice.
523      */
524     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
525     {
526         unsigned int cglo_flags_iteration = cglo_flags;
527         if (bStopCM && cgloIteration == 0)
528         {
529             cglo_flags_iteration |= CGLO_STOPCM;
530             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
531         }
532         compute_globals(gstat, cr, ir, fr, ekind,
533                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
534                         mdatoms, nrnb, &vcm,
535                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
536                         constr, &nullSignaller, state->box,
537                         &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags_iteration
538                         | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0));
539         if (cglo_flags_iteration & CGLO_STOPCM)
540         {
541             /* At initialization, do not pass x with acceleration-correction mode
542              * to avoid (incorrect) correction of the initial coordinates.
543              */
544             rvec *xPtr = nullptr;
545             if (vcm.mode != ecmLINEAR_ACCELERATION_CORRECTION)
546             {
547                 xPtr = state->x.rvec_array();
548             }
549             process_and_stopcm_grp(fplog, &vcm, *mdatoms, xPtr, state->v.rvec_array());
550             inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
551         }
552     }
553     checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
554                                     top_global, &top, state->x.rvec_array(), state->box,
555                                     &shouldCheckNumberOfBondedInteractions);
556     if (ir->eI == eiVVAK)
557     {
558         /* a second call to get the half step temperature initialized as well */
559         /* we do the same call as above, but turn the pressure off -- internally to
560            compute_globals, this is recognized as a velocity verlet half-step
561            kinetic energy calculation.  This minimized excess variables, but
562            perhaps loses some logic?*/
563
564         compute_globals(gstat, cr, ir, fr, ekind,
565                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
566                         mdatoms, nrnb, &vcm,
567                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
568                         constr, &nullSignaller, state->box,
569                         nullptr, &bSumEkinhOld,
570                         cglo_flags & ~CGLO_PRESSURE);
571     }
572
573     /* Calculate the initial half step temperature, and save the ekinh_old */
574     if (startingBehavior == StartingBehavior::NewSimulation)
575     {
576         for (i = 0; (i < ir->opts.ngtc); i++)
577         {
578             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
579         }
580     }
581
582     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
583        temperature control */
584     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
585
586     if (MASTER(cr))
587     {
588         if (!ir->bContinuation)
589         {
590             if (constr && ir->eConstrAlg == econtLINCS)
591             {
592                 fprintf(fplog,
593                         "RMS relative constraint deviation after constraining: %.2e\n",
594                         constr->rmsd());
595             }
596             if (EI_STATE_VELOCITY(ir->eI))
597             {
598                 real temp = enerd->term[F_TEMP];
599                 if (ir->eI != eiVV)
600                 {
601                     /* Result of Ekin averaged over velocities of -half
602                      * and +half step, while we only have -half step here.
603                      */
604                     temp *= 2;
605                 }
606                 fprintf(fplog, "Initial temperature: %g K\n", temp);
607             }
608         }
609
610         char tbuf[20];
611         fprintf(stderr, "starting mdrun '%s'\n",
612                 *(top_global->name));
613         if (ir->nsteps >= 0)
614         {
615             sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
616         }
617         else
618         {
619             sprintf(tbuf, "%s", "infinite");
620         }
621         if (ir->init_step > 0)
622         {
623             fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
624                     gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
625                     gmx_step_str(ir->init_step, sbuf2),
626                     ir->init_step*ir->delta_t);
627         }
628         else
629         {
630             fprintf(stderr, "%s steps, %s ps.\n",
631                     gmx_step_str(ir->nsteps, sbuf), tbuf);
632         }
633         fprintf(fplog, "\n");
634     }
635
636     walltime_accounting_start_time(walltime_accounting);
637     wallcycle_start(wcycle, ewcRUN);
638     print_start(fplog, cr, walltime_accounting, "mdrun");
639
640 #if GMX_FAHCORE
641     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
642     int chkpt_ret = fcCheckPointParallel( cr->nodeid,
643                                           NULL, 0);
644     if (chkpt_ret == 0)
645     {
646         gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
647     }
648 #endif
649
650     /***********************************************************
651      *
652      *             Loop over MD steps
653      *
654      ************************************************************/
655
656     bFirstStep       = TRUE;
657     /* Skip the first Nose-Hoover integration when we get the state from tpx */
658     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
659     bSumEkinhOld     = FALSE;
660     bExchanged       = FALSE;
661     bNeedRepartition = FALSE;
662
663     bool simulationsShareState = false;
664     int  nstSignalComm         = nstglobalcomm;
665     {
666         // TODO This implementation of ensemble orientation restraints is nasty because
667         // a user can't just do multi-sim with single-sim orientation restraints.
668         bool usingEnsembleRestraints = (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
669         bool awhUsesMultiSim         = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
670
671         // Replica exchange, ensemble restraints and AWH need all
672         // simulations to remain synchronized, so they need
673         // checkpoints and stop conditions to act on the same step, so
674         // the propagation of such signals must take place between
675         // simulations, not just within simulations.
676         // TODO: Make algorithm initializers set these flags.
677         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
678
679         if (simulationsShareState)
680         {
681             // Inter-simulation signal communication does not need to happen
682             // often, so we use a minimum of 200 steps to reduce overhead.
683             const int c_minimumInterSimulationSignallingInterval = 200;
684             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1)/nstglobalcomm)*nstglobalcomm;
685         }
686     }
687
688     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
689                 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
690                 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
691                 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
692
693     auto checkpointHandler = std::make_unique<CheckpointHandler>(
694                 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
695                 simulationsShareState, ir->nstlist == 0, MASTER(cr),
696                 mdrunOptions.writeConfout, mdrunOptions.checkpointOptions.period);
697
698     const bool resetCountersIsLocal = true;
699     auto       resetHandler         = std::make_unique<ResetHandler>(
700                 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]), !resetCountersIsLocal,
701                 ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
702                 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
703
704     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
705
706     step     = ir->init_step;
707     step_rel = 0;
708
709     // TODO extract this to new multi-simulation module
710     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
711     {
712         if (!multisim_int_all_are_equal(ms, ir->nsteps))
713         {
714             GMX_LOG(mdlog.warning).appendText(
715                     "Note: The number of steps is not consistent across multi simulations,\n"
716                     "but we are proceeding anyway!");
717         }
718         if (!multisim_int_all_are_equal(ms, ir->init_step))
719         {
720             GMX_LOG(mdlog.warning).appendText(
721                     "Note: The initial step is not consistent across multi simulations,\n"
722                     "but we are proceeding anyway!");
723         }
724     }
725
726     /* and stop now if we should */
727     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
728     while (!bLastStep)
729     {
730
731         /* Determine if this is a neighbor search step */
732         bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
733
734         if (bPMETune && bNStList)
735         {
736             /* PME grid + cut-off optimization with GPUs or PME nodes */
737             pme_loadbal_do(pme_loadbal, cr,
738                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
739                            fplog, mdlog,
740                            *ir, fr, *state,
741                            wcycle,
742                            step, step_rel,
743                            &bPMETunePrinting);
744         }
745
746         wallcycle_start(wcycle, ewcSTEP);
747
748         bLastStep = (step_rel == ir->nsteps);
749         t         = t0 + step*ir->delta_t;
750
751         // TODO Refactor this, so that nstfep does not need a default value of zero
752         if (ir->efep != efepNO || ir->bSimTemp)
753         {
754             /* find and set the current lambdas */
755             setCurrentLambdasLocal(step, ir->fepvals, lam0, state);
756
757             bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
758             bDoFEP       = ((ir->efep != efepNO) && do_per_step(step, nstfep));
759             bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
760                             && (ir->bExpanded) && (step > 0) &&
761                             (startingBehavior == StartingBehavior::NewSimulation));
762         }
763
764         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep &&
765                      do_per_step(step, replExParams.exchangeInterval));
766
767         if (doSimulatedAnnealing)
768         {
769             update_annealing_target_temp(ir, t, &upd);
770         }
771
772         /* Stop Center of Mass motion */
773         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
774
775         /* Determine whether or not to do Neighbour Searching */
776         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
777
778         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
779
780         /* do_log triggers energy and virial calculation. Because this leads
781          * to different code paths, forces can be different. Thus for exact
782          * continuation we should avoid extra log output.
783          * Note that the || bLastStep can result in non-exact continuation
784          * beyond the last step. But we don't consider that to be an issue.
785          */
786         do_log     =
787             (do_per_step(step, ir->nstlog) ||
788              (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) ||
789              bLastStep);
790         do_verbose = mdrunOptions.verbose &&
791             (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
792
793         if (bNS && !(bFirstStep && ir->bContinuation))
794         {
795             bMasterState = FALSE;
796             /* Correct the new box if it is too skewed */
797             if (inputrecDynamicBox(ir))
798             {
799                 if (correct_box(fplog, step, state->box, graph))
800                 {
801                     bMasterState = TRUE;
802                 }
803             }
804             if (DOMAINDECOMP(cr) && bMasterState)
805             {
806                 dd_collect_state(cr->dd, state, state_global);
807             }
808
809             if (DOMAINDECOMP(cr))
810             {
811                 /* Repartition the domain decomposition */
812                 dd_partition_system(fplog, mdlog, step, cr,
813                                     bMasterState, nstglobalcomm,
814                                     state_global, *top_global, ir, imdSession,
815                                     pull_work,
816                                     state, &f, mdAtoms, &top, fr,
817                                     vsite, constr,
818                                     nrnb, wcycle,
819                                     do_verbose && !bPMETunePrinting);
820                 shouldCheckNumberOfBondedInteractions = true;
821                 upd.setNumAtoms(state->natoms);
822             }
823         }
824
825         if (MASTER(cr) && do_log)
826         {
827             energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
828         }
829
830         if (ir->efep != efepNO)
831         {
832             update_mdatoms(mdatoms, state->lambda[efptMASS]);
833         }
834
835         if (bExchanged)
836         {
837
838             /* We need the kinetic energy at minus the half step for determining
839              * the full step kinetic energy and possibly for T-coupling.*/
840             /* This may not be quite working correctly yet . . . . */
841             compute_globals(gstat, cr, ir, fr, ekind,
842                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
843                             mdatoms, nrnb, &vcm,
844                             wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
845                             constr, &nullSignaller, state->box,
846                             &totalNumberOfBondedInteractions, &bSumEkinhOld,
847                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
848             checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
849                                             top_global, &top, state->x.rvec_array(), state->box,
850                                             &shouldCheckNumberOfBondedInteractions);
851         }
852         clear_mat(force_vir);
853
854         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
855
856         /* Determine the energy and pressure:
857          * at nstcalcenergy steps and at energy output steps (set below).
858          */
859         if (EI_VV(ir->eI) && (!bInitStep))
860         {
861             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
862             bCalcVir      = bCalcEnerStep ||
863                 (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
864         }
865         else
866         {
867             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
868             bCalcVir      = bCalcEnerStep ||
869                 (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
870         }
871         bCalcEner = bCalcEnerStep;
872
873         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
874
875         if (do_ene || do_log || bDoReplEx)
876         {
877             bCalcVir  = TRUE;
878             bCalcEner = TRUE;
879         }
880
881         /* Do we need global communication ? */
882         bGStat = (bCalcVir || bCalcEner || bStopCM ||
883                   do_per_step(step, nstglobalcomm) ||
884                   (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm)));
885
886         force_flags = (GMX_FORCE_STATECHANGED |
887                        ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) |
888                        GMX_FORCE_ALLFORCES |
889                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
890                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
891                        (bDoFEP ? GMX_FORCE_DHDL : 0)
892                        );
893
894         if (shellfc)
895         {
896             /* Now is the time to relax the shells */
897             relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose,
898                                 enforcedRotation, step,
899                                 ir, imdSession, pull_work, bNS, force_flags, &top,
900                                 constr, enerd, fcd,
901                                 state->natoms,
902                                 state->x.arrayRefWithPadding(),
903                                 state->v.arrayRefWithPadding(),
904                                 state->box,
905                                 state->lambda,
906                                 &state->hist,
907                                 f.arrayRefWithPadding(), force_vir, mdatoms,
908                                 nrnb, wcycle, graph,
909                                 shellfc, fr, ppForceWorkload, t, mu_tot,
910                                 vsite,
911                                 ddBalanceRegionHandler);
912         }
913         else
914         {
915             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias is updated
916                (or the AWH update will be performed twice for one step when continuing). It would be best to
917                call this update function from do_md_trajectory_writing but that would occur after do_force.
918                One would have to divide the update_awh function into one function applying the AWH force
919                and one doing the AWH bias update. The update AWH bias function could then be called after
920                do_md_trajectory_writing (then containing update_awh_history).
921                The checkpointing will in the future probably moved to the start of the md loop which will
922                rid of this issue. */
923             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
924             {
925                 awh->updateHistory(state_global->awhHistory.get());
926             }
927
928             /* The coordinates (x) are shifted (to get whole molecules)
929              * in do_force.
930              * This is parallellized as well, and does communication too.
931              * Check comments in sim_util.c
932              */
933             do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession,
934                      pull_work,
935                      step, nrnb, wcycle, &top,
936                      state->box, state->x.arrayRefWithPadding(), &state->hist,
937                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
938                      state->lambda, graph,
939                      fr, ppForceWorkload, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
940                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
941                      ddBalanceRegionHandler);
942         }
943
944         // VV integrators do not need the following velocity half step
945         // if it is the first step after starting from a checkpoint.
946         // That is, the half step is needed on all other steps, and
947         // also the first step when starting from a .tpr file.
948         if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
949         /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
950         {
951             rvec *vbuf = nullptr;
952
953             wallcycle_start(wcycle, ewcUPDATE);
954             if (ir->eI == eiVV && bInitStep)
955             {
956                 /* if using velocity verlet with full time step Ekin,
957                  * take the first half step only to compute the
958                  * virial for the first step. From there,
959                  * revert back to the initial coordinates
960                  * so that the input is actually the initial step.
961                  */
962                 snew(vbuf, state->natoms);
963                 copy_rvecn(state->v.rvec_array(), vbuf, 0, state->natoms); /* should make this better for parallelizing? */
964             }
965             else
966             {
967                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
968                 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
969             }
970
971             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
972                           ekind, M, &upd, etrtVELOCITY1,
973                           cr, constr);
974
975             wallcycle_stop(wcycle, ewcUPDATE);
976             constrain_velocities(step, nullptr,
977                                  state,
978                                  shake_vir,
979                                  constr,
980                                  bCalcVir, do_log, do_ene);
981             wallcycle_start(wcycle, ewcUPDATE);
982             /* if VV, compute the pressure and constraints */
983             /* For VV2, we strictly only need this if using pressure
984              * control, but we really would like to have accurate pressures
985              * printed out.
986              * Think about ways around this in the future?
987              * For now, keep this choice in comments.
988              */
989             /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
990             /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
991             bPres = TRUE;
992             bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
993             if (bCalcEner && ir->eI == eiVVAK)
994             {
995                 bSumEkinhOld = TRUE;
996             }
997             /* for vv, the first half of the integration actually corresponds to the previous step.
998                So we need information from the last step in the first half of the integration */
999             if (bGStat || do_per_step(step-1, nstglobalcomm))
1000             {
1001                 wallcycle_stop(wcycle, ewcUPDATE);
1002                 compute_globals(gstat, cr, ir, fr, ekind,
1003                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1004                                 mdatoms, nrnb, &vcm,
1005                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1006                                 constr, &nullSignaller, state->box,
1007                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1008                                 (bGStat ? CGLO_GSTAT : 0)
1009                                 | (bCalcEner ? CGLO_ENERGY : 0)
1010                                 | (bTemp ? CGLO_TEMPERATURE : 0)
1011                                 | (bPres ? CGLO_PRESSURE : 0)
1012                                 | (bPres ? CGLO_CONSTRAINT : 0)
1013                                 | (bStopCM ? CGLO_STOPCM : 0)
1014                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1015                                 | CGLO_SCALEEKIN
1016                                 );
1017                 /* explanation of above:
1018                    a) We compute Ekin at the full time step
1019                    if 1) we are using the AveVel Ekin, and it's not the
1020                    initial step, or 2) if we are using AveEkin, but need the full
1021                    time step kinetic energy for the pressure (always true now, since we want accurate statistics).
1022                    b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
1023                    EkinAveVel because it's needed for the pressure */
1024                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1025                                                 top_global, &top, state->x.rvec_array(), state->box,
1026                                                 &shouldCheckNumberOfBondedInteractions);
1027                 if (bStopCM)
1028                 {
1029                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1030                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1031                 }
1032                 wallcycle_start(wcycle, ewcUPDATE);
1033             }
1034             /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1035             if (!bInitStep)
1036             {
1037                 if (bTrotter)
1038                 {
1039                     m_add(force_vir, shake_vir, total_vir);     /* we need the un-dispersion corrected total vir here */
1040                     trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
1041
1042                     /* TODO This is only needed when we're about to write
1043                      * a checkpoint, because we use it after the restart
1044                      * (in a kludge?). But what should we be doing if
1045                      * the startingBehavior is NewSimulation or bInitStep are true? */
1046                     if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
1047                     {
1048                         copy_mat(shake_vir, state->svir_prev);
1049                         copy_mat(force_vir, state->fvir_prev);
1050                     }
1051                     if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
1052                     {
1053                         /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1054                         enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
1055                         enerd->term[F_EKIN] = trace(ekind->ekin);
1056                     }
1057                 }
1058                 else if (bExchanged)
1059                 {
1060                     wallcycle_stop(wcycle, ewcUPDATE);
1061                     /* We need the kinetic energy at minus the half step for determining
1062                      * the full step kinetic energy and possibly for T-coupling.*/
1063                     /* This may not be quite working correctly yet . . . . */
1064                     compute_globals(gstat, cr, ir, fr, ekind,
1065                                     state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1066                                     mdatoms, nrnb, &vcm,
1067                                     wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
1068                                     constr, &nullSignaller, state->box,
1069                                     nullptr, &bSumEkinhOld,
1070                                     CGLO_GSTAT | CGLO_TEMPERATURE);
1071                     wallcycle_start(wcycle, ewcUPDATE);
1072                 }
1073             }
1074             /* if it's the initial step, we performed this first step just to get the constraint virial */
1075             if (ir->eI == eiVV && bInitStep)
1076             {
1077                 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
1078                 sfree(vbuf);
1079             }
1080             wallcycle_stop(wcycle, ewcUPDATE);
1081         }
1082
1083         /* compute the conserved quantity */
1084         if (EI_VV(ir->eI))
1085         {
1086             saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1087             if (ir->eI == eiVV)
1088             {
1089                 last_ekin = enerd->term[F_EKIN];
1090             }
1091             if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1092             {
1093                 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1094             }
1095             /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
1096             if (ir->efep != efepNO)
1097             {
1098                 sum_dhdl(enerd, state->lambda, ir->fepvals);
1099             }
1100         }
1101
1102         /* ########  END FIRST UPDATE STEP  ############## */
1103         /* ########  If doing VV, we now have v(dt) ###### */
1104         if (bDoExpanded)
1105         {
1106             /* perform extended ensemble sampling in lambda - we don't
1107                actually move to the new state before outputting
1108                statistics, but if performing simulated tempering, we
1109                do update the velocities and the tau_t. */
1110
1111             lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, state->dfhist, step, state->v.rvec_array(), mdatoms);
1112             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1113             if (MASTER(cr))
1114             {
1115                 copy_df_history(state_global->dfhist, state->dfhist);
1116             }
1117         }
1118
1119         /* Now we have the energies and forces corresponding to the
1120          * coordinates at time t. We must output all of this before
1121          * the update.
1122          */
1123         do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
1124                                  ir, state, state_global, observablesHistory,
1125                                  top_global, fr,
1126                                  outf, energyOutput, ekind, f,
1127                                  checkpointHandler->isCheckpointingStep(),
1128                                  bRerunMD, bLastStep,
1129                                  mdrunOptions.writeConfout,
1130                                  bSumEkinhOld);
1131         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1132         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1133
1134         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1135         if (startingBehavior != StartingBehavior::NewSimulation &&
1136             (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1137         {
1138             copy_mat(state->svir_prev, shake_vir);
1139             copy_mat(state->fvir_prev, force_vir);
1140         }
1141
1142         stopHandler->setSignal();
1143         resetHandler->setSignal(walltime_accounting);
1144
1145         if (bGStat || !PAR(cr))
1146         {
1147             /* In parallel we only have to check for checkpointing in steps
1148              * where we do global communication,
1149              *  otherwise the other nodes don't know.
1150              */
1151             checkpointHandler->setSignal(walltime_accounting);
1152         }
1153
1154         /* #########   START SECOND UPDATE STEP ################# */
1155
1156         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
1157            in preprocessing */
1158
1159         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1160         {
1161             gmx_bool bIfRandomize;
1162             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1163             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1164             if (constr && bIfRandomize)
1165             {
1166                 constrain_velocities(step, nullptr,
1167                                      state,
1168                                      tmp_vir,
1169                                      constr,
1170                                      bCalcVir, do_log, do_ene);
1171             }
1172         }
1173         /* Box is changed in update() when we do pressure coupling,
1174          * but we should still use the old box for energy corrections and when
1175          * writing it to the energy file, so it matches the trajectory files for
1176          * the same timestep above. Make a copy in a separate array.
1177          */
1178         copy_mat(state->box, lastbox);
1179
1180         dvdl_constr = 0;
1181
1182         wallcycle_start(wcycle, ewcUPDATE);
1183         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1184         if (bTrotter)
1185         {
1186             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1187             /* We can only do Berendsen coupling after we have summed
1188              * the kinetic energy or virial. Since the happens
1189              * in global_state after update, we should only do it at
1190              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1191              */
1192         }
1193         else
1194         {
1195             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1196             update_pcouple_before_coordinates(fplog, step, ir, state,
1197                                               parrinellorahmanMu, M,
1198                                               bInitStep);
1199         }
1200
1201         if (EI_VV(ir->eI))
1202         {
1203             /* velocity half-step update */
1204             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1205                           ekind, M, &upd, etrtVELOCITY2,
1206                           cr, constr);
1207         }
1208
1209         /* Above, initialize just copies ekinh into ekin,
1210          * it doesn't copy position (for VV),
1211          * and entire integrator for MD.
1212          */
1213
1214         if (ir->eI == eiVVAK)
1215         {
1216             cbuf.resize(state->x.size());
1217             std::copy(state->x.begin(), state->x.end(), cbuf.begin());
1218         }
1219
1220         if (c_useGpuUpdateConstrain)
1221         {
1222             if (bNS)
1223             {
1224                 integrator->set(top.idef, *mdatoms, ekind->ngtc);
1225                 t_pbc pbc;
1226                 set_pbc(&pbc, epbcXYZ, state->box);
1227                 integrator->setPbc(&pbc);
1228             }
1229             integrator->copyCoordinatesToGpu(state->x.rvec_array());
1230             integrator->copyVelocitiesToGpu(state->v.rvec_array());
1231             integrator->copyForcesToGpu(as_rvec_array(f.data()));
1232
1233             // This applies Leap-Frog, LINCS and SETTLE in a succession
1234             bool doTempCouple = (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1235
1236             integrator->integrate(ir->delta_t, true, bCalcVir, shake_vir, doTempCouple, ekind->tcstat);
1237
1238             integrator->copyCoordinatesFromGpu(state->x.rvec_array());
1239             integrator->copyVelocitiesFromGpu(state->v.rvec_array());
1240         }
1241         else
1242         {
1243             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1244                           ekind, M, &upd, etrtPOSITION, cr, constr);
1245
1246             wallcycle_stop(wcycle, ewcUPDATE);
1247
1248             constrain_coordinates(step, &dvdl_constr, state,
1249                                   shake_vir,
1250                                   &upd, constr,
1251                                   bCalcVir, do_log, do_ene);
1252
1253             update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state,
1254                                   cr, nrnb, wcycle, &upd, constr, do_log, do_ene);
1255             finish_update(ir, mdatoms,
1256                           state, graph,
1257                           nrnb, wcycle, &upd, constr);
1258         }
1259
1260         if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1261         {
1262             updatePrevStepPullCom(pull_work, state);
1263         }
1264
1265         if (ir->eI == eiVVAK)
1266         {
1267             /* erase F_EKIN and F_TEMP here? */
1268             /* just compute the kinetic energy at the half step to perform a trotter step */
1269             compute_globals(gstat, cr, ir, fr, ekind,
1270                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1271                             mdatoms, nrnb, &vcm,
1272                             wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1273                             constr, &nullSignaller, lastbox,
1274                             nullptr, &bSumEkinhOld,
1275                             (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE
1276                             );
1277             wallcycle_start(wcycle, ewcUPDATE);
1278             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1279             /* now we know the scaling, we can compute the positions again */
1280             std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
1281
1282             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1283                           ekind, M, &upd, etrtPOSITION, cr, constr);
1284             wallcycle_stop(wcycle, ewcUPDATE);
1285
1286             /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1287             /* are the small terms in the shake_vir here due
1288              * to numerical errors, or are they important
1289              * physically? I'm thinking they are just errors, but not completely sure.
1290              * For now, will call without actually constraining, constr=NULL*/
1291             finish_update(ir, mdatoms,
1292                           state, graph,
1293                           nrnb, wcycle, &upd, nullptr);
1294         }
1295         if (EI_VV(ir->eI))
1296         {
1297             /* this factor or 2 correction is necessary
1298                because half of the constraint force is removed
1299                in the vv step, so we have to double it.  See
1300                the Redmine issue #1255.  It is not yet clear
1301                if the factor of 2 is exact, or just a very
1302                good approximation, and this will be
1303                investigated.  The next step is to see if this
1304                can be done adding a dhdl contribution from the
1305                rattle step, but this is somewhat more
1306                complicated with the current code. Will be
1307                investigated, hopefully for 4.6.3. However,
1308                this current solution is much better than
1309                having it completely wrong.
1310              */
1311             enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
1312         }
1313         else
1314         {
1315             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1316         }
1317
1318         if (vsite != nullptr)
1319         {
1320             wallcycle_start(wcycle, ewcVSITECONSTR);
1321             if (graph != nullptr)
1322             {
1323                 shift_self(graph, state->box, state->x.rvec_array());
1324             }
1325             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
1326                              top.idef.iparams, top.idef.il,
1327                              fr->ePBC, fr->bMolPBC, cr, state->box);
1328
1329             if (graph != nullptr)
1330             {
1331                 unshift_self(graph, state->box, state->x.rvec_array());
1332             }
1333             wallcycle_stop(wcycle, ewcVSITECONSTR);
1334         }
1335
1336         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1337         /* With Leap-Frog we can skip compute_globals at
1338          * non-communication steps, but we need to calculate
1339          * the kinetic energy one step before communication.
1340          */
1341         {
1342             // Organize to do inter-simulation signalling on steps if
1343             // and when algorithms require it.
1344             bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1345
1346             if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal)
1347             {
1348                 // Since we're already communicating at this step, we
1349                 // can propagate intra-simulation signals. Note that
1350                 // check_nstglobalcomm has the responsibility for
1351                 // choosing the value of nstglobalcomm that is one way
1352                 // bGStat becomes true, so we can't get into a
1353                 // situation where e.g. checkpointing can't be
1354                 // signalled.
1355                 bool                doIntraSimSignal = true;
1356                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1357
1358                 compute_globals(gstat, cr, ir, fr, ekind,
1359                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1360                                 mdatoms, nrnb, &vcm,
1361                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1362                                 constr, &signaller,
1363                                 lastbox,
1364                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1365                                 (bGStat ? CGLO_GSTAT : 0)
1366                                 | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1367                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1368                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1369                                 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
1370                                 | CGLO_CONSTRAINT
1371                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1372                                 );
1373                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1374                                                 top_global, &top, state->x.rvec_array(), state->box,
1375                                                 &shouldCheckNumberOfBondedInteractions);
1376                 if (!EI_VV(ir->eI) && bStopCM)
1377                 {
1378                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1379                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1380                 }
1381             }
1382         }
1383
1384         /* #############  END CALC EKIN AND PRESSURE ################# */
1385
1386         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1387            the virial that should probably be addressed eventually. state->veta has better properies,
1388            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1389            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1390
1391         if (ir->efep != efepNO && !EI_VV(ir->eI))
1392         {
1393             /* Sum up the foreign energy and dhdl terms for md and sd.
1394                Currently done every step so that dhdl is correct in the .edr */
1395             sum_dhdl(enerd, state->lambda, ir->fepvals);
1396         }
1397
1398         update_pcouple_after_coordinates(fplog, step, ir, mdatoms,
1399                                          pres, force_vir, shake_vir,
1400                                          parrinellorahmanMu,
1401                                          state, nrnb, &upd);
1402
1403         /* ################# END UPDATE STEP 2 ################# */
1404         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1405
1406         /* The coordinates (x) were unshifted in update */
1407         if (!bGStat)
1408         {
1409             /* We will not sum ekinh_old,
1410              * so signal that we still have to do it.
1411              */
1412             bSumEkinhOld = TRUE;
1413         }
1414
1415         if (bCalcEner)
1416         {
1417             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1418
1419             /* use the directly determined last velocity, not actually the averaged half steps */
1420             if (bTrotter && ir->eI == eiVV)
1421             {
1422                 enerd->term[F_EKIN] = last_ekin;
1423             }
1424             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1425
1426             if (integratorHasConservedEnergyQuantity(ir))
1427             {
1428                 if (EI_VV(ir->eI))
1429                 {
1430                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1431                 }
1432                 else
1433                 {
1434                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1435                 }
1436             }
1437             /* #########  END PREPARING EDR OUTPUT  ###########  */
1438         }
1439
1440         /* Output stuff */
1441         if (MASTER(cr))
1442         {
1443             if (fplog && do_log && bDoExpanded)
1444             {
1445                 /* only needed if doing expanded ensemble */
1446                 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : nullptr,
1447                                           state_global->dfhist, state->fep_state, ir->nstlog, step);
1448             }
1449             if (bCalcEner)
1450             {
1451                 energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep,
1452                                                  t, mdatoms->tmass, enerd, state,
1453                                                  ir->fepvals, ir->expandedvals, lastbox,
1454                                                  shake_vir, force_vir, total_vir, pres,
1455                                                  ekind, mu_tot, constr);
1456             }
1457             else
1458             {
1459                 energyOutput.recordNonEnergyStep();
1460             }
1461
1462             gmx_bool do_dr  = do_per_step(step, ir->nstdisreout);
1463             gmx_bool do_or  = do_per_step(step, ir->nstorireout);
1464
1465             energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
1466             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1467                                                do_log ? fplog : nullptr,
1468                                                step, t,
1469                                                fcd, awh.get());
1470
1471             if (ir->bPull)
1472             {
1473                 pull_print_output(pull_work, step, t);
1474             }
1475
1476             if (do_per_step(step, ir->nstlog))
1477             {
1478                 if (fflush(fplog) != 0)
1479                 {
1480                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1481                 }
1482             }
1483         }
1484         if (bDoExpanded)
1485         {
1486             /* Have to do this part _after_ outputting the logfile and the edr file */
1487             /* Gets written into the state at the beginning of next loop*/
1488             state->fep_state = lamnew;
1489         }
1490         /* Print the remaining wall clock time for the run */
1491         if (isMasterSimMasterRank(ms, MASTER(cr)) &&
1492             (do_verbose || gmx_got_usr_signal()) &&
1493             !bPMETunePrinting)
1494         {
1495             if (shellfc)
1496             {
1497                 fprintf(stderr, "\n");
1498             }
1499             print_time(stderr, walltime_accounting, step, ir, cr);
1500         }
1501
1502         /* Ion/water position swapping.
1503          * Not done in last step since trajectory writing happens before this call
1504          * in the MD loop and exchanges would be lost anyway. */
1505         bNeedRepartition = FALSE;
1506         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
1507             do_per_step(step, ir->swap->nstswap))
1508         {
1509             bNeedRepartition = do_swapcoords(cr, step, t, ir, swap, wcycle,
1510                                              as_rvec_array(state->x.data()),
1511                                              state->box,
1512                                              MASTER(cr) && mdrunOptions.verbose,
1513                                              bRerunMD);
1514
1515             if (bNeedRepartition && DOMAINDECOMP(cr))
1516             {
1517                 dd_collect_state(cr->dd, state, state_global);
1518             }
1519         }
1520
1521         /* Replica exchange */
1522         bExchanged = FALSE;
1523         if (bDoReplEx)
1524         {
1525             bExchanged = replica_exchange(fplog, cr, ms, repl_ex,
1526                                           state_global, enerd,
1527                                           state, step, t);
1528         }
1529
1530         if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
1531         {
1532             dd_partition_system(fplog, mdlog, step, cr, TRUE, 1,
1533                                 state_global, *top_global, ir, imdSession,
1534                                 pull_work,
1535                                 state, &f, mdAtoms, &top, fr,
1536                                 vsite, constr,
1537                                 nrnb, wcycle, FALSE);
1538             shouldCheckNumberOfBondedInteractions = true;
1539             upd.setNumAtoms(state->natoms);
1540         }
1541
1542         bFirstStep             = FALSE;
1543         bInitStep              = FALSE;
1544
1545         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1546         /* With all integrators, except VV, we need to retain the pressure
1547          * at the current step for coupling at the next step.
1548          */
1549         if ((state->flags & (1U<<estPRES_PREV)) &&
1550             (bGStatEveryStep ||
1551              (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1552         {
1553             /* Store the pressure in t_state for pressure coupling
1554              * at the next MD step.
1555              */
1556             copy_mat(pres, state->pres_prev);
1557         }
1558
1559         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1560
1561         if ( (membed != nullptr) && (!bLastStep) )
1562         {
1563             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1564         }
1565
1566         cycles = wallcycle_stop(wcycle, ewcSTEP);
1567         if (DOMAINDECOMP(cr) && wcycle)
1568         {
1569             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1570         }
1571
1572         /* increase the MD step number */
1573         step++;
1574         step_rel++;
1575
1576         resetHandler->resetCounters(
1577                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(),
1578                 nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1579
1580         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1581         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1582
1583     }
1584     /* End of main MD loop */
1585
1586     /* Closing TNG files can include compressing data. Therefore it is good to do that
1587      * before stopping the time measurements. */
1588     mdoutf_tng_close(outf);
1589
1590     /* Stop measuring walltime */
1591     walltime_accounting_end_time(walltime_accounting);
1592
1593     if (!thisRankHasDuty(cr, DUTY_PME))
1594     {
1595         /* Tell the PME only node to finish */
1596         gmx_pme_send_finish(cr);
1597     }
1598
1599     if (MASTER(cr))
1600     {
1601         if (ir->nstcalcenergy > 0)
1602         {
1603             energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
1604             energyOutput.printAverages(fplog, groups);
1605         }
1606     }
1607     done_mdoutf(outf);
1608
1609     if (bPMETune)
1610     {
1611         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1612     }
1613
1614     done_shellfc(fplog, shellfc, step_rel);
1615
1616     if (useReplicaExchange && MASTER(cr))
1617     {
1618         print_replica_exchange_statistics(fplog, repl_ex);
1619     }
1620
1621     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1622
1623     global_stat_destroy(gstat);
1624
1625 }