18dc39041dc83743567e2a93e8a0d559e4fe2b82
[alexxy/gromacs.git] / src / gromacs / mdrun / md.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief Implements the integrator for normal molecular dynamics simulations
40  *
41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42  * \ingroup module_mdrun
43  */
44 #include "gmxpre.h"
45
46 #include <cinttypes>
47 #include <cmath>
48 #include <cstdio>
49 #include <cstdlib>
50
51 #include <algorithm>
52 #include <memory>
53
54 #include "gromacs/awh/awh.h"
55 #include "gromacs/commandline/filenm.h"
56 #include "gromacs/domdec/collect.h"
57 #include "gromacs/domdec/dlbtiming.h"
58 #include "gromacs/domdec/domdec.h"
59 #include "gromacs/domdec/domdec_network.h"
60 #include "gromacs/domdec/domdec_struct.h"
61 #include "gromacs/domdec/mdsetup.h"
62 #include "gromacs/domdec/partition.h"
63 #include "gromacs/essentialdynamics/edsam.h"
64 #include "gromacs/ewald/pme.h"
65 #include "gromacs/ewald/pme_load_balancing.h"
66 #include "gromacs/fileio/trxio.h"
67 #include "gromacs/gmxlib/network.h"
68 #include "gromacs/gmxlib/nrnb.h"
69 #include "gromacs/gpu_utils/gpu_utils.h"
70 #include "gromacs/imd/imd.h"
71 #include "gromacs/listed_forces/manage_threading.h"
72 #include "gromacs/math/functions.h"
73 #include "gromacs/math/utilities.h"
74 #include "gromacs/math/vec.h"
75 #include "gromacs/math/vectypes.h"
76 #include "gromacs/mdlib/checkpointhandler.h"
77 #include "gromacs/mdlib/compute_io.h"
78 #include "gromacs/mdlib/constr.h"
79 #include "gromacs/mdlib/ebin.h"
80 #include "gromacs/mdlib/enerdata_utils.h"
81 #include "gromacs/mdlib/energyoutput.h"
82 #include "gromacs/mdlib/expanded.h"
83 #include "gromacs/mdlib/force.h"
84 #include "gromacs/mdlib/force_flags.h"
85 #include "gromacs/mdlib/forcerec.h"
86 #include "gromacs/mdlib/md_support.h"
87 #include "gromacs/mdlib/mdatoms.h"
88 #include "gromacs/mdlib/mdoutf.h"
89 #include "gromacs/mdlib/membed.h"
90 #include "gromacs/mdlib/resethandler.h"
91 #include "gromacs/mdlib/sighandler.h"
92 #include "gromacs/mdlib/simulationsignal.h"
93 #include "gromacs/mdlib/stat.h"
94 #include "gromacs/mdlib/stophandler.h"
95 #include "gromacs/mdlib/tgroup.h"
96 #include "gromacs/mdlib/trajectory_writing.h"
97 #include "gromacs/mdlib/update.h"
98 #include "gromacs/mdlib/update_constrain_cuda.h"
99 #include "gromacs/mdlib/vcm.h"
100 #include "gromacs/mdlib/vsite.h"
101 #include "gromacs/mdrunutility/handlerestart.h"
102 #include "gromacs/mdrunutility/multisim.h"
103 #include "gromacs/mdrunutility/printtime.h"
104 #include "gromacs/mdtypes/awh_history.h"
105 #include "gromacs/mdtypes/awh_params.h"
106 #include "gromacs/mdtypes/commrec.h"
107 #include "gromacs/mdtypes/df_history.h"
108 #include "gromacs/mdtypes/energyhistory.h"
109 #include "gromacs/mdtypes/fcdata.h"
110 #include "gromacs/mdtypes/forcerec.h"
111 #include "gromacs/mdtypes/group.h"
112 #include "gromacs/mdtypes/inputrec.h"
113 #include "gromacs/mdtypes/interaction_const.h"
114 #include "gromacs/mdtypes/md_enums.h"
115 #include "gromacs/mdtypes/mdatom.h"
116 #include "gromacs/mdtypes/mdrunoptions.h"
117 #include "gromacs/mdtypes/observableshistory.h"
118 #include "gromacs/mdtypes/pullhistory.h"
119 #include "gromacs/mdtypes/state.h"
120 #include "gromacs/nbnxm/nbnxm.h"
121 #include "gromacs/pbcutil/mshift.h"
122 #include "gromacs/pbcutil/pbc.h"
123 #include "gromacs/pulling/output.h"
124 #include "gromacs/pulling/pull.h"
125 #include "gromacs/swap/swapcoords.h"
126 #include "gromacs/timing/wallcycle.h"
127 #include "gromacs/timing/walltime_accounting.h"
128 #include "gromacs/topology/atoms.h"
129 #include "gromacs/topology/idef.h"
130 #include "gromacs/topology/mtop_util.h"
131 #include "gromacs/topology/topology.h"
132 #include "gromacs/trajectory/trajectoryframe.h"
133 #include "gromacs/utility/basedefinitions.h"
134 #include "gromacs/utility/cstringutil.h"
135 #include "gromacs/utility/fatalerror.h"
136 #include "gromacs/utility/logger.h"
137 #include "gromacs/utility/real.h"
138 #include "gromacs/utility/smalloc.h"
139
140 #include "legacysimulator.h"
141 #include "replicaexchange.h"
142 #include "shellfc.h"
143
144 #if GMX_FAHCORE
145 #include "corewrap.h"
146 #endif
147
148 using gmx::SimulationSignaller;
149
150 //! Whether the GPU versions of Leap-Frog integrator and LINCS and SETTLE constraints
151 static const bool c_useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
152
153 void gmx::LegacySimulator::do_md()
154 {
155     // TODO Historically, the EM and MD "integrators" used different
156     // names for the t_inputrec *parameter, but these must have the
157     // same name, now that it's a member of a struct. We use this ir
158     // alias to avoid a large ripple of nearly useless changes.
159     // t_inputrec is being replaced by IMdpOptionsProvider, so this
160     // will go away eventually.
161     t_inputrec             *ir   = inputrec;
162     int64_t                 step, step_rel;
163     double                  t, t0 = ir->init_t, lam0[efptNR];
164     gmx_bool                bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
165     gmx_bool                bNS, bNStList, bStopCM,
166                             bFirstStep, bInitStep, bLastStep = FALSE;
167     gmx_bool                bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
168     gmx_bool                do_ene, do_log, do_verbose;
169     gmx_bool                bMasterState;
170     unsigned int            force_flags;
171     tensor                  force_vir = {{0}}, shake_vir = {{0}}, total_vir = {{0}},
172                             tmp_vir   = {{0}}, pres = {{0}};
173     int                     i, m;
174     rvec                    mu_tot;
175     matrix                  parrinellorahmanMu, M;
176     gmx_repl_ex_t           repl_ex = nullptr;
177     gmx_localtop_t          top;
178     PaddedVector<gmx::RVec> f {};
179     gmx_global_stat_t       gstat;
180     t_graph                *graph = nullptr;
181     gmx_shellfc_t          *shellfc;
182     gmx_bool                bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
183     gmx_bool                bTemp, bPres, bTrotter;
184     real                    dvdl_constr;
185     std::vector<RVec>       cbuf;
186     matrix                  lastbox;
187     int                     lamnew  = 0;
188     /* for FEP */
189     int                     nstfep = 0;
190     double                  cycles;
191     real                    saved_conserved_quantity = 0;
192     real                    last_ekin                = 0;
193     t_extmass               MassQ;
194     char                    sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
195
196     /* PME load balancing data for GPU kernels */
197     gmx_bool              bPMETune         = FALSE;
198     gmx_bool              bPMETunePrinting = FALSE;
199
200     bool                  bInteractiveMDstep = false;
201
202     /* Domain decomposition could incorrectly miss a bonded
203        interaction, but checking for that requires a global
204        communication stage, which does not otherwise happen in DD
205        code. So we do that alongside the first global energy reduction
206        after a new DD is made. These variables handle whether the
207        check happens, and the result it returns. */
208     bool              shouldCheckNumberOfBondedInteractions = false;
209     int               totalNumberOfBondedInteractions       = -1;
210
211     SimulationSignals signals;
212     // Most global communnication stages don't propagate mdrun
213     // signals, and will use this object to achieve that.
214     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
215
216     if (!mdrunOptions.writeConfout)
217     {
218         // This is on by default, and the main known use case for
219         // turning it off is for convenience in benchmarking, which is
220         // something that should not show up in the general user
221         // interface.
222         GMX_LOG(mdlog.info).asParagraph().
223             appendText("The -noconfout functionality is deprecated, and may be removed in a future version.");
224     }
225
226     /* md-vv uses averaged full step velocities for T-control
227        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
228        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
229     bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
230
231     const bool bRerunMD      = false;
232
233     int        nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
234     bGStatEveryStep = (nstglobalcomm == 1);
235
236     SimulationGroups                  *groups = &top_global->groups;
237
238     std::unique_ptr<EssentialDynamics> ed = nullptr;
239     if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr)
240     {
241         /* Initialize essential dynamics sampling */
242         ed = init_edsam(mdlog,
243                         opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm),
244                         top_global,
245                         ir, cr, constr,
246                         state_global, observablesHistory,
247                         oenv,
248                         startingBehavior);
249     }
250
251     initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
252     Update upd(ir, deform);
253     bool   doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
254     if (startingBehavior != StartingBehavior::RestartWithAppending)
255     {
256         pleaseCiteCouplingAlgorithms(fplog, *ir);
257     }
258     gmx_mdoutf       *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir, top_global, oenv, wcycle,
259                                          StartingBehavior::NewSimulation);
260     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, mdoutf_get_fp_dhdl(outf), false);
261
262     gstat = global_stat_init(ir);
263
264     /* Check for polarizable models and flexible constraints */
265     shellfc = init_shell_flexcon(fplog,
266                                  top_global, constr ? constr->numFlexibleConstraints() : 0,
267                                  ir->nstcalcenergy, DOMAINDECOMP(cr));
268
269     {
270         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
271         if ((io > 2000) && MASTER(cr))
272         {
273             fprintf(stderr,
274                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
275                     io);
276         }
277     }
278
279     // Local state only becomes valid now.
280     std::unique_ptr<t_state> stateInstance;
281     t_state *                state;
282
283
284     auto mdatoms = mdAtoms->mdatoms();
285
286     std::unique_ptr<UpdateConstrainCuda> integrator;
287
288     if (DOMAINDECOMP(cr))
289     {
290         dd_init_local_top(*top_global, &top);
291
292         stateInstance = std::make_unique<t_state>();
293         state         = stateInstance.get();
294         dd_init_local_state(cr->dd, state_global, state);
295
296         /* Distribute the charge groups over the nodes from the master node */
297         dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1,
298                             state_global, *top_global, ir, imdSession,
299                             pull_work,
300                             state, &f, mdAtoms, &top, fr,
301                             vsite, constr,
302                             nrnb, nullptr, FALSE);
303         shouldCheckNumberOfBondedInteractions = true;
304         upd.setNumAtoms(state->natoms);
305     }
306     else
307     {
308         state_change_natoms(state_global, state_global->natoms);
309         f.resizeWithPadding(state_global->natoms);
310         /* Copy the pointer to the global state */
311         state = state_global;
312
313         /* Generate and initialize new topology */
314         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr,
315                                   &graph, mdAtoms, constr, vsite, shellfc);
316
317         upd.setNumAtoms(state->natoms);
318     }
319
320     if (c_useGpuUpdateConstrain)
321     {
322         GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU.");
323         GMX_RELEASE_ASSERT(ir->etc != etcNOSEHOOVER, "Nose Hoover temperature coupling is not supported on the GPU.");
324         GMX_RELEASE_ASSERT(ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN, "Only Parrinello Rahman pressure control is supported on the GPU.");
325         GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU");
326         GMX_RELEASE_ASSERT(ed == nullptr, "Essential dynamics is not supported with GPU-based update constraints.");
327         GMX_LOG(mdlog.info).asParagraph().
328             appendText("Using CUDA GPU-based update and constraints module.");
329         integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global);
330         integrator->set(top.idef, *mdatoms, ekind->ngtc);
331         t_pbc pbc;
332         set_pbc(&pbc, epbcXYZ, state->box);
333         integrator->setPbc(&pbc);
334     }
335
336     if (fr->nbv->useGpu())
337     {
338         changePinningPolicy(&state->x, gmx::PinningPolicy::PinnedIfSupported);
339     }
340
341     // NOTE: The global state is no longer used at this point.
342     // But state_global is still used as temporary storage space for writing
343     // the global state to file and potentially for replica exchange.
344     // (Global topology should persist.)
345
346     update_mdatoms(mdatoms, state->lambda[efptMASS]);
347
348     if (ir->bExpanded)
349     {
350         /* Check nstexpanded here, because the grompp check was broken */
351         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
352         {
353             gmx_fatal(FARGS, "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
354         }
355         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation,
356                                ir, state->dfhist);
357     }
358
359     if (MASTER(cr))
360     {
361         if (startingBehavior != StartingBehavior::NewSimulation)
362         {
363             /* Restore from energy history if appending to output files */
364             if (startingBehavior == StartingBehavior::RestartWithAppending)
365             {
366                 /* If no history is available (because a checkpoint is from before
367                  * it was written) make a new one later, otherwise restore it.
368                  */
369                 if (observablesHistory->energyHistory)
370                 {
371                     energyOutput.restoreFromEnergyHistory(*observablesHistory->energyHistory);
372                 }
373             }
374             else if (observablesHistory->energyHistory)
375             {
376                 /* We might have read an energy history from checkpoint.
377                  * As we are not appending, we want to restart the statistics.
378                  * Free the allocated memory and reset the counts.
379                  */
380                 observablesHistory->energyHistory = {};
381                 /* We might have read a pull history from checkpoint.
382                  * We will still want to keep the statistics, so that the files
383                  * can be joined and still be meaningful.
384                  * This means that observablesHistory->pullHistory
385                  * should not be reset.
386                  */
387             }
388         }
389         if (!observablesHistory->energyHistory)
390         {
391             observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
392         }
393         if (!observablesHistory->pullHistory)
394         {
395             observablesHistory->pullHistory = std::make_unique<PullHistory>();
396         }
397         /* Set the initial energy history */
398         energyOutput.fillEnergyHistory(observablesHistory->energyHistory.get());
399     }
400
401     preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
402                            startingBehavior != StartingBehavior::NewSimulation);
403
404     // TODO: Remove this by converting AWH into a ForceProvider
405     auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingBehavior != StartingBehavior::NewSimulation,
406                                 shellfc != nullptr,
407                                 opt2fn("-awh", nfile, fnm), pull_work);
408
409     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
410     if (useReplicaExchange && MASTER(cr))
411     {
412         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir,
413                                         replExParams);
414     }
415     /* PME tuning is only supported in the Verlet scheme, with PME for
416      * Coulomb. It is not supported with only LJ PME. */
417     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) &&
418                 !mdrunOptions.reproducible && ir->cutoff_scheme != ecutsGROUP);
419
420     pme_load_balancing_t *pme_loadbal      = nullptr;
421     if (bPMETune)
422     {
423         pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
424                          *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu(),
425                          &bPMETunePrinting);
426     }
427
428     if (!ir->bContinuation)
429     {
430         if (state->flags & (1U << estV))
431         {
432             auto v = makeArrayRef(state->v);
433             /* Set the velocities of vsites, shells and frozen atoms to zero */
434             for (i = 0; i < mdatoms->homenr; i++)
435             {
436                 if (mdatoms->ptype[i] == eptVSite ||
437                     mdatoms->ptype[i] == eptShell)
438                 {
439                     clear_rvec(v[i]);
440                 }
441                 else if (mdatoms->cFREEZE)
442                 {
443                     for (m = 0; m < DIM; m++)
444                     {
445                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
446                         {
447                             v[i][m] = 0;
448                         }
449                     }
450                 }
451             }
452         }
453
454         if (constr)
455         {
456             /* Constrain the initial coordinates and velocities */
457             do_constrain_first(fplog, constr, ir, mdatoms, state);
458         }
459         if (vsite)
460         {
461             /* Construct the virtual sites for the initial configuration */
462             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr,
463                              top.idef.iparams, top.idef.il,
464                              fr->ePBC, fr->bMolPBC, cr, state->box);
465         }
466     }
467
468     if (ir->efep != efepNO)
469     {
470         /* Set free energy calculation frequency as the greatest common
471          * denominator of nstdhdl and repl_ex_nst. */
472         nstfep = ir->fepvals->nstdhdl;
473         if (ir->bExpanded)
474         {
475             nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
476         }
477         if (useReplicaExchange)
478         {
479             nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
480         }
481     }
482
483     /* Be REALLY careful about what flags you set here. You CANNOT assume
484      * this is the first step, since we might be restarting from a checkpoint,
485      * and in that case we should not do any modifications to the state.
486      */
487     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
488
489     // When restarting from a checkpoint, it can be appropriate to
490     // initialize ekind from quantities in the checkpoint. Otherwise,
491     // compute_globals must initialize ekind before the simulation
492     // starts/restarts. However, only the master rank knows what was
493     // found in the checkpoint file, so we have to communicate in
494     // order to coordinate the restart.
495     //
496     // TODO Consider removing this communication if/when checkpoint
497     // reading directly follows .tpr reading, because all ranks can
498     // agree on hasReadEkinState at that time.
499     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
500     if (PAR(cr))
501     {
502         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
503     }
504     if (hasReadEkinState)
505     {
506         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
507     }
508
509     unsigned int cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
510                                | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
511                                | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0)
512                                | (hasReadEkinState ? CGLO_READEKIN : 0));
513
514     bSumEkinhOld = FALSE;
515
516     t_vcm vcm(top_global->groups, *ir);
517     reportComRemovalInfo(fplog, vcm);
518
519     /* To minimize communication, compute_globals computes the COM velocity
520      * and the kinetic energy for the velocities without COM motion removed.
521      * Thus to get the kinetic energy without the COM contribution, we need
522      * to call compute_globals twice.
523      */
524     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
525     {
526         unsigned int cglo_flags_iteration = cglo_flags;
527         if (bStopCM && cgloIteration == 0)
528         {
529             cglo_flags_iteration |= CGLO_STOPCM;
530             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
531         }
532         compute_globals(gstat, cr, ir, fr, ekind,
533                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
534                         mdatoms, nrnb, &vcm,
535                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
536                         constr, &nullSignaller, state->box,
537                         &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags_iteration
538                         | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0));
539         if (cglo_flags_iteration & CGLO_STOPCM)
540         {
541             /* At initialization, do not pass x with acceleration-correction mode
542              * to avoid (incorrect) correction of the initial coordinates.
543              */
544             rvec *xPtr = nullptr;
545             if (vcm.mode != ecmLINEAR_ACCELERATION_CORRECTION)
546             {
547                 xPtr = state->x.rvec_array();
548             }
549             process_and_stopcm_grp(fplog, &vcm, *mdatoms, xPtr, state->v.rvec_array());
550             inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
551         }
552     }
553     checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
554                                     top_global, &top, state->x.rvec_array(), state->box,
555                                     &shouldCheckNumberOfBondedInteractions);
556     if (ir->eI == eiVVAK)
557     {
558         /* a second call to get the half step temperature initialized as well */
559         /* we do the same call as above, but turn the pressure off -- internally to
560            compute_globals, this is recognized as a velocity verlet half-step
561            kinetic energy calculation.  This minimized excess variables, but
562            perhaps loses some logic?*/
563
564         compute_globals(gstat, cr, ir, fr, ekind,
565                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
566                         mdatoms, nrnb, &vcm,
567                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
568                         constr, &nullSignaller, state->box,
569                         nullptr, &bSumEkinhOld,
570                         cglo_flags & ~CGLO_PRESSURE);
571     }
572
573     /* Calculate the initial half step temperature, and save the ekinh_old */
574     if (startingBehavior == StartingBehavior::NewSimulation)
575     {
576         for (i = 0; (i < ir->opts.ngtc); i++)
577         {
578             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
579         }
580     }
581
582     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
583        temperature control */
584     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
585
586     if (MASTER(cr))
587     {
588         if (!ir->bContinuation)
589         {
590             if (constr && ir->eConstrAlg == econtLINCS)
591             {
592                 fprintf(fplog,
593                         "RMS relative constraint deviation after constraining: %.2e\n",
594                         constr->rmsd());
595             }
596             if (EI_STATE_VELOCITY(ir->eI))
597             {
598                 real temp = enerd->term[F_TEMP];
599                 if (ir->eI != eiVV)
600                 {
601                     /* Result of Ekin averaged over velocities of -half
602                      * and +half step, while we only have -half step here.
603                      */
604                     temp *= 2;
605                 }
606                 fprintf(fplog, "Initial temperature: %g K\n", temp);
607             }
608         }
609
610         char tbuf[20];
611         fprintf(stderr, "starting mdrun '%s'\n",
612                 *(top_global->name));
613         if (ir->nsteps >= 0)
614         {
615             sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
616         }
617         else
618         {
619             sprintf(tbuf, "%s", "infinite");
620         }
621         if (ir->init_step > 0)
622         {
623             fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
624                     gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
625                     gmx_step_str(ir->init_step, sbuf2),
626                     ir->init_step*ir->delta_t);
627         }
628         else
629         {
630             fprintf(stderr, "%s steps, %s ps.\n",
631                     gmx_step_str(ir->nsteps, sbuf), tbuf);
632         }
633         fprintf(fplog, "\n");
634     }
635
636     walltime_accounting_start_time(walltime_accounting);
637     wallcycle_start(wcycle, ewcRUN);
638     print_start(fplog, cr, walltime_accounting, "mdrun");
639
640 #if GMX_FAHCORE
641     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
642     int chkpt_ret = fcCheckPointParallel( cr->nodeid,
643                                           NULL, 0);
644     if (chkpt_ret == 0)
645     {
646         gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
647     }
648 #endif
649
650     /***********************************************************
651      *
652      *             Loop over MD steps
653      *
654      ************************************************************/
655
656     bFirstStep       = TRUE;
657     /* Skip the first Nose-Hoover integration when we get the state from tpx */
658     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
659     bSumEkinhOld     = FALSE;
660     bExchanged       = FALSE;
661     bNeedRepartition = FALSE;
662
663     bool simulationsShareState = false;
664     int  nstSignalComm         = nstglobalcomm;
665     {
666         // TODO This implementation of ensemble orientation restraints is nasty because
667         // a user can't just do multi-sim with single-sim orientation restraints.
668         bool usingEnsembleRestraints = (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
669         bool awhUsesMultiSim         = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
670
671         // Replica exchange, ensemble restraints and AWH need all
672         // simulations to remain synchronized, so they need
673         // checkpoints and stop conditions to act on the same step, so
674         // the propagation of such signals must take place between
675         // simulations, not just within simulations.
676         // TODO: Make algorithm initializers set these flags.
677         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
678
679         if (simulationsShareState)
680         {
681             // Inter-simulation signal communication does not need to happen
682             // often, so we use a minimum of 200 steps to reduce overhead.
683             const int c_minimumInterSimulationSignallingInterval = 200;
684             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1)/nstglobalcomm)*nstglobalcomm;
685         }
686     }
687
688     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
689                 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
690                 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
691                 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
692
693     auto checkpointHandler = std::make_unique<CheckpointHandler>(
694                 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
695                 simulationsShareState, ir->nstlist == 0, MASTER(cr),
696                 mdrunOptions.writeConfout, mdrunOptions.checkpointOptions.period);
697
698     const bool resetCountersIsLocal = true;
699     auto       resetHandler         = std::make_unique<ResetHandler>(
700                 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]), !resetCountersIsLocal,
701                 ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
702                 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
703
704     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
705
706     step     = ir->init_step;
707     step_rel = 0;
708
709     // TODO extract this to new multi-simulation module
710     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
711     {
712         if (!multisim_int_all_are_equal(ms, ir->nsteps))
713         {
714             GMX_LOG(mdlog.warning).appendText(
715                     "Note: The number of steps is not consistent across multi simulations,\n"
716                     "but we are proceeding anyway!");
717         }
718         if (!multisim_int_all_are_equal(ms, ir->init_step))
719         {
720             GMX_LOG(mdlog.warning).appendText(
721                     "Note: The initial step is not consistent across multi simulations,\n"
722                     "but we are proceeding anyway!");
723         }
724     }
725
726     /* and stop now if we should */
727     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
728     while (!bLastStep)
729     {
730
731         /* Determine if this is a neighbor search step */
732         bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
733
734         if (bPMETune && bNStList)
735         {
736             /* PME grid + cut-off optimization with GPUs or PME nodes */
737             pme_loadbal_do(pme_loadbal, cr,
738                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
739                            fplog, mdlog,
740                            *ir, fr, *state,
741                            wcycle,
742                            step, step_rel,
743                            &bPMETunePrinting);
744         }
745
746         wallcycle_start(wcycle, ewcSTEP);
747
748         bLastStep = (step_rel == ir->nsteps);
749         t         = t0 + step*ir->delta_t;
750
751         // TODO Refactor this, so that nstfep does not need a default value of zero
752         if (ir->efep != efepNO || ir->bSimTemp)
753         {
754             /* find and set the current lambdas */
755             setCurrentLambdasLocal(step, ir->fepvals, lam0, state);
756
757             bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
758             bDoFEP       = ((ir->efep != efepNO) && do_per_step(step, nstfep));
759             bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
760                             && (ir->bExpanded) && (step > 0) &&
761                             (startingBehavior == StartingBehavior::NewSimulation));
762         }
763
764         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep &&
765                      do_per_step(step, replExParams.exchangeInterval));
766
767         if (doSimulatedAnnealing)
768         {
769             update_annealing_target_temp(ir, t, &upd);
770         }
771
772         /* Stop Center of Mass motion */
773         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
774
775         /* Determine whether or not to do Neighbour Searching */
776         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
777
778         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
779
780         /* do_log triggers energy and virial calculation. Because this leads
781          * to different code paths, forces can be different. Thus for exact
782          * continuation we should avoid extra log output.
783          * Note that the || bLastStep can result in non-exact continuation
784          * beyond the last step. But we don't consider that to be an issue.
785          */
786         do_log     =
787             (do_per_step(step, ir->nstlog) ||
788              (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) ||
789              bLastStep);
790         do_verbose = mdrunOptions.verbose &&
791             (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
792
793         if (bNS && !(bFirstStep && ir->bContinuation))
794         {
795             bMasterState = FALSE;
796             /* Correct the new box if it is too skewed */
797             if (inputrecDynamicBox(ir))
798             {
799                 if (correct_box(fplog, step, state->box, graph))
800                 {
801                     bMasterState = TRUE;
802                 }
803             }
804             if (DOMAINDECOMP(cr) && bMasterState)
805             {
806                 dd_collect_state(cr->dd, state, state_global);
807             }
808
809             if (DOMAINDECOMP(cr))
810             {
811                 /* Repartition the domain decomposition */
812                 dd_partition_system(fplog, mdlog, step, cr,
813                                     bMasterState, nstglobalcomm,
814                                     state_global, *top_global, ir, imdSession,
815                                     pull_work,
816                                     state, &f, mdAtoms, &top, fr,
817                                     vsite, constr,
818                                     nrnb, wcycle,
819                                     do_verbose && !bPMETunePrinting);
820                 shouldCheckNumberOfBondedInteractions = true;
821                 upd.setNumAtoms(state->natoms);
822             }
823         }
824
825         if (MASTER(cr) && do_log)
826         {
827             energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
828         }
829
830         if (ir->efep != efepNO)
831         {
832             update_mdatoms(mdatoms, state->lambda[efptMASS]);
833         }
834
835         if (bExchanged)
836         {
837
838             /* We need the kinetic energy at minus the half step for determining
839              * the full step kinetic energy and possibly for T-coupling.*/
840             /* This may not be quite working correctly yet . . . . */
841             compute_globals(gstat, cr, ir, fr, ekind,
842                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
843                             mdatoms, nrnb, &vcm,
844                             wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
845                             constr, &nullSignaller, state->box,
846                             &totalNumberOfBondedInteractions, &bSumEkinhOld,
847                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
848             checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
849                                             top_global, &top, state->x.rvec_array(), state->box,
850                                             &shouldCheckNumberOfBondedInteractions);
851         }
852         clear_mat(force_vir);
853
854         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
855
856         /* Determine the energy and pressure:
857          * at nstcalcenergy steps and at energy output steps (set below).
858          */
859         if (EI_VV(ir->eI) && (!bInitStep))
860         {
861             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
862             bCalcVir      = bCalcEnerStep ||
863                 (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
864         }
865         else
866         {
867             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
868             bCalcVir      = bCalcEnerStep ||
869                 (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
870         }
871         bCalcEner = bCalcEnerStep;
872
873         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
874
875         if (do_ene || do_log || bDoReplEx)
876         {
877             bCalcVir  = TRUE;
878             bCalcEner = TRUE;
879         }
880
881         /* Do we need global communication ? */
882         bGStat = (bCalcVir || bCalcEner || bStopCM ||
883                   do_per_step(step, nstglobalcomm) ||
884                   (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm)));
885
886         force_flags = (GMX_FORCE_STATECHANGED |
887                        ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) |
888                        GMX_FORCE_ALLFORCES |
889                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
890                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
891                        (bDoFEP ? GMX_FORCE_DHDL : 0)
892                        );
893
894         if (shellfc)
895         {
896             /* Now is the time to relax the shells */
897             relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose,
898                                 enforcedRotation, step,
899                                 ir, imdSession, pull_work, bNS, force_flags, &top,
900                                 constr, enerd, fcd,
901                                 state->natoms,
902                                 state->x.arrayRefWithPadding(),
903                                 state->v.arrayRefWithPadding(),
904                                 state->box,
905                                 state->lambda,
906                                 &state->hist,
907                                 f.arrayRefWithPadding(), force_vir, mdatoms,
908                                 nrnb, wcycle, graph,
909                                 shellfc, fr, mdScheduleWork, t, mu_tot,
910                                 vsite,
911                                 ddBalanceRegionHandler);
912         }
913         else
914         {
915             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias is updated
916                (or the AWH update will be performed twice for one step when continuing). It would be best to
917                call this update function from do_md_trajectory_writing but that would occur after do_force.
918                One would have to divide the update_awh function into one function applying the AWH force
919                and one doing the AWH bias update. The update AWH bias function could then be called after
920                do_md_trajectory_writing (then containing update_awh_history).
921                The checkpointing will in the future probably moved to the start of the md loop which will
922                rid of this issue. */
923             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
924             {
925                 awh->updateHistory(state_global->awhHistory.get());
926             }
927
928             /* The coordinates (x) are shifted (to get whole molecules)
929              * in do_force.
930              * This is parallellized as well, and does communication too.
931              * Check comments in sim_util.c
932              */
933             do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession,
934                      pull_work,
935                      step, nrnb, wcycle, &top,
936                      state->box, state->x.arrayRefWithPadding(), &state->hist,
937                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
938                      state->lambda, graph,
939                      fr, mdScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
940                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
941                      ddBalanceRegionHandler);
942         }
943
944         // VV integrators do not need the following velocity half step
945         // if it is the first step after starting from a checkpoint.
946         // That is, the half step is needed on all other steps, and
947         // also the first step when starting from a .tpr file.
948         if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
949         /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
950         {
951             rvec *vbuf = nullptr;
952
953             wallcycle_start(wcycle, ewcUPDATE);
954             if (ir->eI == eiVV && bInitStep)
955             {
956                 /* if using velocity verlet with full time step Ekin,
957                  * take the first half step only to compute the
958                  * virial for the first step. From there,
959                  * revert back to the initial coordinates
960                  * so that the input is actually the initial step.
961                  */
962                 snew(vbuf, state->natoms);
963                 copy_rvecn(state->v.rvec_array(), vbuf, 0, state->natoms); /* should make this better for parallelizing? */
964             }
965             else
966             {
967                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
968                 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
969             }
970
971             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
972                           ekind, M, &upd, etrtVELOCITY1,
973                           cr, constr);
974
975             wallcycle_stop(wcycle, ewcUPDATE);
976             constrain_velocities(step, nullptr,
977                                  state,
978                                  shake_vir,
979                                  constr,
980                                  bCalcVir, do_log, do_ene);
981             wallcycle_start(wcycle, ewcUPDATE);
982             /* if VV, compute the pressure and constraints */
983             /* For VV2, we strictly only need this if using pressure
984              * control, but we really would like to have accurate pressures
985              * printed out.
986              * Think about ways around this in the future?
987              * For now, keep this choice in comments.
988              */
989             /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
990             /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
991             bPres = TRUE;
992             bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
993             if (bCalcEner && ir->eI == eiVVAK)
994             {
995                 bSumEkinhOld = TRUE;
996             }
997             /* for vv, the first half of the integration actually corresponds to the previous step.
998                So we need information from the last step in the first half of the integration */
999             if (bGStat || do_per_step(step-1, nstglobalcomm))
1000             {
1001                 wallcycle_stop(wcycle, ewcUPDATE);
1002                 compute_globals(gstat, cr, ir, fr, ekind,
1003                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1004                                 mdatoms, nrnb, &vcm,
1005                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1006                                 constr, &nullSignaller, state->box,
1007                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1008                                 (bGStat ? CGLO_GSTAT : 0)
1009                                 | (bCalcEner ? CGLO_ENERGY : 0)
1010                                 | (bTemp ? CGLO_TEMPERATURE : 0)
1011                                 | (bPres ? CGLO_PRESSURE : 0)
1012                                 | (bPres ? CGLO_CONSTRAINT : 0)
1013                                 | (bStopCM ? CGLO_STOPCM : 0)
1014                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1015                                 | CGLO_SCALEEKIN
1016                                 );
1017                 /* explanation of above:
1018                    a) We compute Ekin at the full time step
1019                    if 1) we are using the AveVel Ekin, and it's not the
1020                    initial step, or 2) if we are using AveEkin, but need the full
1021                    time step kinetic energy for the pressure (always true now, since we want accurate statistics).
1022                    b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
1023                    EkinAveVel because it's needed for the pressure */
1024                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1025                                                 top_global, &top, state->x.rvec_array(), state->box,
1026                                                 &shouldCheckNumberOfBondedInteractions);
1027                 if (bStopCM)
1028                 {
1029                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1030                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1031                 }
1032                 wallcycle_start(wcycle, ewcUPDATE);
1033             }
1034             /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1035             if (!bInitStep)
1036             {
1037                 if (bTrotter)
1038                 {
1039                     m_add(force_vir, shake_vir, total_vir);     /* we need the un-dispersion corrected total vir here */
1040                     trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
1041
1042                     /* TODO This is only needed when we're about to write
1043                      * a checkpoint, because we use it after the restart
1044                      * (in a kludge?). But what should we be doing if
1045                      * the startingBehavior is NewSimulation or bInitStep are true? */
1046                     if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
1047                     {
1048                         copy_mat(shake_vir, state->svir_prev);
1049                         copy_mat(force_vir, state->fvir_prev);
1050                     }
1051                     if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
1052                     {
1053                         /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1054                         enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
1055                         enerd->term[F_EKIN] = trace(ekind->ekin);
1056                     }
1057                 }
1058                 else if (bExchanged)
1059                 {
1060                     wallcycle_stop(wcycle, ewcUPDATE);
1061                     /* We need the kinetic energy at minus the half step for determining
1062                      * the full step kinetic energy and possibly for T-coupling.*/
1063                     /* This may not be quite working correctly yet . . . . */
1064                     compute_globals(gstat, cr, ir, fr, ekind,
1065                                     state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1066                                     mdatoms, nrnb, &vcm,
1067                                     wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
1068                                     constr, &nullSignaller, state->box,
1069                                     nullptr, &bSumEkinhOld,
1070                                     CGLO_GSTAT | CGLO_TEMPERATURE);
1071                     wallcycle_start(wcycle, ewcUPDATE);
1072                 }
1073             }
1074             /* if it's the initial step, we performed this first step just to get the constraint virial */
1075             if (ir->eI == eiVV && bInitStep)
1076             {
1077                 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
1078                 sfree(vbuf);
1079             }
1080             wallcycle_stop(wcycle, ewcUPDATE);
1081         }
1082
1083         /* compute the conserved quantity */
1084         if (EI_VV(ir->eI))
1085         {
1086             saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1087             if (ir->eI == eiVV)
1088             {
1089                 last_ekin = enerd->term[F_EKIN];
1090             }
1091             if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1092             {
1093                 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1094             }
1095             /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
1096             if (ir->efep != efepNO)
1097             {
1098                 sum_dhdl(enerd, state->lambda, ir->fepvals);
1099             }
1100         }
1101
1102         /* ########  END FIRST UPDATE STEP  ############## */
1103         /* ########  If doing VV, we now have v(dt) ###### */
1104         if (bDoExpanded)
1105         {
1106             /* perform extended ensemble sampling in lambda - we don't
1107                actually move to the new state before outputting
1108                statistics, but if performing simulated tempering, we
1109                do update the velocities and the tau_t. */
1110
1111             lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, state->dfhist, step, state->v.rvec_array(), mdatoms);
1112             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1113             if (MASTER(cr))
1114             {
1115                 copy_df_history(state_global->dfhist, state->dfhist);
1116             }
1117         }
1118
1119         /* Now we have the energies and forces corresponding to the
1120          * coordinates at time t. We must output all of this before
1121          * the update.
1122          */
1123         do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
1124                                  ir, state, state_global, observablesHistory,
1125                                  top_global, fr,
1126                                  outf, energyOutput, ekind, f,
1127                                  checkpointHandler->isCheckpointingStep(),
1128                                  bRerunMD, bLastStep,
1129                                  mdrunOptions.writeConfout,
1130                                  bSumEkinhOld);
1131         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1132         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1133
1134         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1135         if (startingBehavior != StartingBehavior::NewSimulation &&
1136             (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1137         {
1138             copy_mat(state->svir_prev, shake_vir);
1139             copy_mat(state->fvir_prev, force_vir);
1140         }
1141
1142         stopHandler->setSignal();
1143         resetHandler->setSignal(walltime_accounting);
1144
1145         if (bGStat || !PAR(cr))
1146         {
1147             /* In parallel we only have to check for checkpointing in steps
1148              * where we do global communication,
1149              *  otherwise the other nodes don't know.
1150              */
1151             checkpointHandler->setSignal(walltime_accounting);
1152         }
1153
1154         /* #########   START SECOND UPDATE STEP ################# */
1155
1156         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
1157            in preprocessing */
1158
1159         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1160         {
1161             gmx_bool bIfRandomize;
1162             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1163             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1164             if (constr && bIfRandomize)
1165             {
1166                 constrain_velocities(step, nullptr,
1167                                      state,
1168                                      tmp_vir,
1169                                      constr,
1170                                      bCalcVir, do_log, do_ene);
1171             }
1172         }
1173         /* Box is changed in update() when we do pressure coupling,
1174          * but we should still use the old box for energy corrections and when
1175          * writing it to the energy file, so it matches the trajectory files for
1176          * the same timestep above. Make a copy in a separate array.
1177          */
1178         copy_mat(state->box, lastbox);
1179
1180         dvdl_constr = 0;
1181
1182         wallcycle_start(wcycle, ewcUPDATE);
1183         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1184         if (bTrotter)
1185         {
1186             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1187             /* We can only do Berendsen coupling after we have summed
1188              * the kinetic energy or virial. Since the happens
1189              * in global_state after update, we should only do it at
1190              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1191              */
1192         }
1193         else
1194         {
1195             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1196             update_pcouple_before_coordinates(fplog, step, ir, state,
1197                                               parrinellorahmanMu, M,
1198                                               bInitStep);
1199         }
1200
1201         if (EI_VV(ir->eI))
1202         {
1203             /* velocity half-step update */
1204             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1205                           ekind, M, &upd, etrtVELOCITY2,
1206                           cr, constr);
1207         }
1208
1209         /* Above, initialize just copies ekinh into ekin,
1210          * it doesn't copy position (for VV),
1211          * and entire integrator for MD.
1212          */
1213
1214         if (ir->eI == eiVVAK)
1215         {
1216             cbuf.resize(state->x.size());
1217             std::copy(state->x.begin(), state->x.end(), cbuf.begin());
1218         }
1219
1220         if (c_useGpuUpdateConstrain)
1221         {
1222             if (bNS)
1223             {
1224                 integrator->set(top.idef, *mdatoms, ekind->ngtc);
1225                 t_pbc pbc;
1226                 set_pbc(&pbc, epbcXYZ, state->box);
1227                 integrator->setPbc(&pbc);
1228             }
1229             integrator->copyCoordinatesToGpu(state->x.rvec_array());
1230             integrator->copyVelocitiesToGpu(state->v.rvec_array());
1231             integrator->copyForcesToGpu(as_rvec_array(f.data()));
1232
1233             bool doTempCouple     = (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1234             bool doPressureCouple = (ir->epc == epcPARRINELLORAHMAN && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1235
1236             // This applies Leap-Frog, LINCS and SETTLE in succession
1237             integrator->integrate(ir->delta_t, true, bCalcVir, shake_vir,
1238                                   doTempCouple, ekind->tcstat,
1239                                   doPressureCouple, ir->nstpcouple*ir->delta_t, M);
1240
1241             integrator->copyCoordinatesFromGpu(state->x.rvec_array());
1242             integrator->copyVelocitiesFromGpu(state->v.rvec_array());
1243         }
1244         else
1245         {
1246             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1247                           ekind, M, &upd, etrtPOSITION, cr, constr);
1248
1249             wallcycle_stop(wcycle, ewcUPDATE);
1250
1251             constrain_coordinates(step, &dvdl_constr, state,
1252                                   shake_vir,
1253                                   &upd, constr,
1254                                   bCalcVir, do_log, do_ene);
1255
1256             update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state,
1257                                   cr, nrnb, wcycle, &upd, constr, do_log, do_ene);
1258             finish_update(ir, mdatoms,
1259                           state, graph,
1260                           nrnb, wcycle, &upd, constr);
1261         }
1262
1263         if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1264         {
1265             updatePrevStepPullCom(pull_work, state);
1266         }
1267
1268         if (ir->eI == eiVVAK)
1269         {
1270             /* erase F_EKIN and F_TEMP here? */
1271             /* just compute the kinetic energy at the half step to perform a trotter step */
1272             compute_globals(gstat, cr, ir, fr, ekind,
1273                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1274                             mdatoms, nrnb, &vcm,
1275                             wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1276                             constr, &nullSignaller, lastbox,
1277                             nullptr, &bSumEkinhOld,
1278                             (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE
1279                             );
1280             wallcycle_start(wcycle, ewcUPDATE);
1281             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1282             /* now we know the scaling, we can compute the positions again */
1283             std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
1284
1285             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1286                           ekind, M, &upd, etrtPOSITION, cr, constr);
1287             wallcycle_stop(wcycle, ewcUPDATE);
1288
1289             /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1290             /* are the small terms in the shake_vir here due
1291              * to numerical errors, or are they important
1292              * physically? I'm thinking they are just errors, but not completely sure.
1293              * For now, will call without actually constraining, constr=NULL*/
1294             finish_update(ir, mdatoms,
1295                           state, graph,
1296                           nrnb, wcycle, &upd, nullptr);
1297         }
1298         if (EI_VV(ir->eI))
1299         {
1300             /* this factor or 2 correction is necessary
1301                because half of the constraint force is removed
1302                in the vv step, so we have to double it.  See
1303                the Redmine issue #1255.  It is not yet clear
1304                if the factor of 2 is exact, or just a very
1305                good approximation, and this will be
1306                investigated.  The next step is to see if this
1307                can be done adding a dhdl contribution from the
1308                rattle step, but this is somewhat more
1309                complicated with the current code. Will be
1310                investigated, hopefully for 4.6.3. However,
1311                this current solution is much better than
1312                having it completely wrong.
1313              */
1314             enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
1315         }
1316         else
1317         {
1318             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1319         }
1320
1321         if (vsite != nullptr)
1322         {
1323             wallcycle_start(wcycle, ewcVSITECONSTR);
1324             if (graph != nullptr)
1325             {
1326                 shift_self(graph, state->box, state->x.rvec_array());
1327             }
1328             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
1329                              top.idef.iparams, top.idef.il,
1330                              fr->ePBC, fr->bMolPBC, cr, state->box);
1331
1332             if (graph != nullptr)
1333             {
1334                 unshift_self(graph, state->box, state->x.rvec_array());
1335             }
1336             wallcycle_stop(wcycle, ewcVSITECONSTR);
1337         }
1338
1339         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1340         /* With Leap-Frog we can skip compute_globals at
1341          * non-communication steps, but we need to calculate
1342          * the kinetic energy one step before communication.
1343          */
1344         {
1345             // Organize to do inter-simulation signalling on steps if
1346             // and when algorithms require it.
1347             bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1348
1349             if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal)
1350             {
1351                 // Since we're already communicating at this step, we
1352                 // can propagate intra-simulation signals. Note that
1353                 // check_nstglobalcomm has the responsibility for
1354                 // choosing the value of nstglobalcomm that is one way
1355                 // bGStat becomes true, so we can't get into a
1356                 // situation where e.g. checkpointing can't be
1357                 // signalled.
1358                 bool                doIntraSimSignal = true;
1359                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1360
1361                 compute_globals(gstat, cr, ir, fr, ekind,
1362                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1363                                 mdatoms, nrnb, &vcm,
1364                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1365                                 constr, &signaller,
1366                                 lastbox,
1367                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1368                                 (bGStat ? CGLO_GSTAT : 0)
1369                                 | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1370                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1371                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1372                                 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
1373                                 | CGLO_CONSTRAINT
1374                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1375                                 );
1376                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1377                                                 top_global, &top, state->x.rvec_array(), state->box,
1378                                                 &shouldCheckNumberOfBondedInteractions);
1379                 if (!EI_VV(ir->eI) && bStopCM)
1380                 {
1381                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1382                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1383                 }
1384             }
1385         }
1386
1387         /* #############  END CALC EKIN AND PRESSURE ################# */
1388
1389         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1390            the virial that should probably be addressed eventually. state->veta has better properies,
1391            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1392            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1393
1394         if (ir->efep != efepNO && !EI_VV(ir->eI))
1395         {
1396             /* Sum up the foreign energy and dhdl terms for md and sd.
1397                Currently done every step so that dhdl is correct in the .edr */
1398             sum_dhdl(enerd, state->lambda, ir->fepvals);
1399         }
1400
1401         update_pcouple_after_coordinates(fplog, step, ir, mdatoms,
1402                                          pres, force_vir, shake_vir,
1403                                          parrinellorahmanMu,
1404                                          state, nrnb, &upd);
1405
1406         /* ################# END UPDATE STEP 2 ################# */
1407         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1408
1409         /* The coordinates (x) were unshifted in update */
1410         if (!bGStat)
1411         {
1412             /* We will not sum ekinh_old,
1413              * so signal that we still have to do it.
1414              */
1415             bSumEkinhOld = TRUE;
1416         }
1417
1418         if (bCalcEner)
1419         {
1420             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1421
1422             /* use the directly determined last velocity, not actually the averaged half steps */
1423             if (bTrotter && ir->eI == eiVV)
1424             {
1425                 enerd->term[F_EKIN] = last_ekin;
1426             }
1427             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1428
1429             if (integratorHasConservedEnergyQuantity(ir))
1430             {
1431                 if (EI_VV(ir->eI))
1432                 {
1433                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1434                 }
1435                 else
1436                 {
1437                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1438                 }
1439             }
1440             /* #########  END PREPARING EDR OUTPUT  ###########  */
1441         }
1442
1443         /* Output stuff */
1444         if (MASTER(cr))
1445         {
1446             if (fplog && do_log && bDoExpanded)
1447             {
1448                 /* only needed if doing expanded ensemble */
1449                 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : nullptr,
1450                                           state_global->dfhist, state->fep_state, ir->nstlog, step);
1451             }
1452             if (bCalcEner)
1453             {
1454                 energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep,
1455                                                  t, mdatoms->tmass, enerd, state,
1456                                                  ir->fepvals, ir->expandedvals, lastbox,
1457                                                  shake_vir, force_vir, total_vir, pres,
1458                                                  ekind, mu_tot, constr);
1459             }
1460             else
1461             {
1462                 energyOutput.recordNonEnergyStep();
1463             }
1464
1465             gmx_bool do_dr  = do_per_step(step, ir->nstdisreout);
1466             gmx_bool do_or  = do_per_step(step, ir->nstorireout);
1467
1468             energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
1469             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1470                                                do_log ? fplog : nullptr,
1471                                                step, t,
1472                                                fcd, awh.get());
1473
1474             if (ir->bPull)
1475             {
1476                 pull_print_output(pull_work, step, t);
1477             }
1478
1479             if (do_per_step(step, ir->nstlog))
1480             {
1481                 if (fflush(fplog) != 0)
1482                 {
1483                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1484                 }
1485             }
1486         }
1487         if (bDoExpanded)
1488         {
1489             /* Have to do this part _after_ outputting the logfile and the edr file */
1490             /* Gets written into the state at the beginning of next loop*/
1491             state->fep_state = lamnew;
1492         }
1493         /* Print the remaining wall clock time for the run */
1494         if (isMasterSimMasterRank(ms, MASTER(cr)) &&
1495             (do_verbose || gmx_got_usr_signal()) &&
1496             !bPMETunePrinting)
1497         {
1498             if (shellfc)
1499             {
1500                 fprintf(stderr, "\n");
1501             }
1502             print_time(stderr, walltime_accounting, step, ir, cr);
1503         }
1504
1505         /* Ion/water position swapping.
1506          * Not done in last step since trajectory writing happens before this call
1507          * in the MD loop and exchanges would be lost anyway. */
1508         bNeedRepartition = FALSE;
1509         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
1510             do_per_step(step, ir->swap->nstswap))
1511         {
1512             bNeedRepartition = do_swapcoords(cr, step, t, ir, swap, wcycle,
1513                                              as_rvec_array(state->x.data()),
1514                                              state->box,
1515                                              MASTER(cr) && mdrunOptions.verbose,
1516                                              bRerunMD);
1517
1518             if (bNeedRepartition && DOMAINDECOMP(cr))
1519             {
1520                 dd_collect_state(cr->dd, state, state_global);
1521             }
1522         }
1523
1524         /* Replica exchange */
1525         bExchanged = FALSE;
1526         if (bDoReplEx)
1527         {
1528             bExchanged = replica_exchange(fplog, cr, ms, repl_ex,
1529                                           state_global, enerd,
1530                                           state, step, t);
1531         }
1532
1533         if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
1534         {
1535             dd_partition_system(fplog, mdlog, step, cr, TRUE, 1,
1536                                 state_global, *top_global, ir, imdSession,
1537                                 pull_work,
1538                                 state, &f, mdAtoms, &top, fr,
1539                                 vsite, constr,
1540                                 nrnb, wcycle, FALSE);
1541             shouldCheckNumberOfBondedInteractions = true;
1542             upd.setNumAtoms(state->natoms);
1543         }
1544
1545         bFirstStep             = FALSE;
1546         bInitStep              = FALSE;
1547
1548         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1549         /* With all integrators, except VV, we need to retain the pressure
1550          * at the current step for coupling at the next step.
1551          */
1552         if ((state->flags & (1U<<estPRES_PREV)) &&
1553             (bGStatEveryStep ||
1554              (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1555         {
1556             /* Store the pressure in t_state for pressure coupling
1557              * at the next MD step.
1558              */
1559             copy_mat(pres, state->pres_prev);
1560         }
1561
1562         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1563
1564         if ( (membed != nullptr) && (!bLastStep) )
1565         {
1566             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1567         }
1568
1569         cycles = wallcycle_stop(wcycle, ewcSTEP);
1570         if (DOMAINDECOMP(cr) && wcycle)
1571         {
1572             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1573         }
1574
1575         /* increase the MD step number */
1576         step++;
1577         step_rel++;
1578
1579         resetHandler->resetCounters(
1580                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(),
1581                 nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1582
1583         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1584         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1585
1586     }
1587     /* End of main MD loop */
1588
1589     /* Closing TNG files can include compressing data. Therefore it is good to do that
1590      * before stopping the time measurements. */
1591     mdoutf_tng_close(outf);
1592
1593     /* Stop measuring walltime */
1594     walltime_accounting_end_time(walltime_accounting);
1595
1596     if (!thisRankHasDuty(cr, DUTY_PME))
1597     {
1598         /* Tell the PME only node to finish */
1599         gmx_pme_send_finish(cr);
1600     }
1601
1602     if (MASTER(cr))
1603     {
1604         if (ir->nstcalcenergy > 0)
1605         {
1606             energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
1607             energyOutput.printAverages(fplog, groups);
1608         }
1609     }
1610     done_mdoutf(outf);
1611
1612     if (bPMETune)
1613     {
1614         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1615     }
1616
1617     done_shellfc(fplog, shellfc, step_rel);
1618
1619     if (useReplicaExchange && MASTER(cr))
1620     {
1621         print_replica_exchange_statistics(fplog, repl_ex);
1622     }
1623
1624     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1625
1626     global_stat_destroy(gstat);
1627
1628 }