f5c247089a0bf13c6b622afcae973e45b8e96e9f
[alexxy/gromacs.git] / src / gromacs / mdrun / md.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief Implements the integrator for normal molecular dynamics simulations
40  *
41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42  * \ingroup module_mdrun
43  */
44 #include "gmxpre.h"
45
46 #include <cinttypes>
47 #include <cmath>
48 #include <cstdio>
49 #include <cstdlib>
50
51 #include <algorithm>
52 #include <memory>
53
54 #include "gromacs/awh/awh.h"
55 #include "gromacs/commandline/filenm.h"
56 #include "gromacs/domdec/collect.h"
57 #include "gromacs/domdec/dlbtiming.h"
58 #include "gromacs/domdec/domdec.h"
59 #include "gromacs/domdec/domdec_network.h"
60 #include "gromacs/domdec/domdec_struct.h"
61 #include "gromacs/domdec/mdsetup.h"
62 #include "gromacs/domdec/partition.h"
63 #include "gromacs/essentialdynamics/edsam.h"
64 #include "gromacs/ewald/pme.h"
65 #include "gromacs/ewald/pme_load_balancing.h"
66 #include "gromacs/fileio/trxio.h"
67 #include "gromacs/gmxlib/network.h"
68 #include "gromacs/gmxlib/nrnb.h"
69 #include "gromacs/gpu_utils/gpu_utils.h"
70 #include "gromacs/imd/imd.h"
71 #include "gromacs/listed_forces/manage_threading.h"
72 #include "gromacs/math/functions.h"
73 #include "gromacs/math/utilities.h"
74 #include "gromacs/math/vec.h"
75 #include "gromacs/math/vectypes.h"
76 #include "gromacs/mdlib/checkpointhandler.h"
77 #include "gromacs/mdlib/compute_io.h"
78 #include "gromacs/mdlib/constr.h"
79 #include "gromacs/mdlib/ebin.h"
80 #include "gromacs/mdlib/enerdata_utils.h"
81 #include "gromacs/mdlib/energyoutput.h"
82 #include "gromacs/mdlib/expanded.h"
83 #include "gromacs/mdlib/force.h"
84 #include "gromacs/mdlib/force_flags.h"
85 #include "gromacs/mdlib/forcerec.h"
86 #include "gromacs/mdlib/md_support.h"
87 #include "gromacs/mdlib/mdatoms.h"
88 #include "gromacs/mdlib/mdoutf.h"
89 #include "gromacs/mdlib/membed.h"
90 #include "gromacs/mdlib/resethandler.h"
91 #include "gromacs/mdlib/sighandler.h"
92 #include "gromacs/mdlib/simulationsignal.h"
93 #include "gromacs/mdlib/stat.h"
94 #include "gromacs/mdlib/stophandler.h"
95 #include "gromacs/mdlib/tgroup.h"
96 #include "gromacs/mdlib/trajectory_writing.h"
97 #include "gromacs/mdlib/update.h"
98 #include "gromacs/mdlib/update_constrain_cuda.h"
99 #include "gromacs/mdlib/vcm.h"
100 #include "gromacs/mdlib/vsite.h"
101 #include "gromacs/mdrunutility/handlerestart.h"
102 #include "gromacs/mdrunutility/multisim.h"
103 #include "gromacs/mdrunutility/printtime.h"
104 #include "gromacs/mdtypes/awh_history.h"
105 #include "gromacs/mdtypes/awh_params.h"
106 #include "gromacs/mdtypes/commrec.h"
107 #include "gromacs/mdtypes/df_history.h"
108 #include "gromacs/mdtypes/energyhistory.h"
109 #include "gromacs/mdtypes/fcdata.h"
110 #include "gromacs/mdtypes/forcerec.h"
111 #include "gromacs/mdtypes/group.h"
112 #include "gromacs/mdtypes/inputrec.h"
113 #include "gromacs/mdtypes/interaction_const.h"
114 #include "gromacs/mdtypes/md_enums.h"
115 #include "gromacs/mdtypes/mdatom.h"
116 #include "gromacs/mdtypes/mdrunoptions.h"
117 #include "gromacs/mdtypes/observableshistory.h"
118 #include "gromacs/mdtypes/pullhistory.h"
119 #include "gromacs/mdtypes/state.h"
120 #include "gromacs/nbnxm/nbnxm.h"
121 #include "gromacs/pbcutil/mshift.h"
122 #include "gromacs/pbcutil/pbc.h"
123 #include "gromacs/pulling/output.h"
124 #include "gromacs/pulling/pull.h"
125 #include "gromacs/swap/swapcoords.h"
126 #include "gromacs/timing/wallcycle.h"
127 #include "gromacs/timing/walltime_accounting.h"
128 #include "gromacs/topology/atoms.h"
129 #include "gromacs/topology/idef.h"
130 #include "gromacs/topology/mtop_util.h"
131 #include "gromacs/topology/topology.h"
132 #include "gromacs/trajectory/trajectoryframe.h"
133 #include "gromacs/utility/basedefinitions.h"
134 #include "gromacs/utility/cstringutil.h"
135 #include "gromacs/utility/fatalerror.h"
136 #include "gromacs/utility/logger.h"
137 #include "gromacs/utility/real.h"
138 #include "gromacs/utility/smalloc.h"
139
140 #include "legacysimulator.h"
141 #include "replicaexchange.h"
142 #include "shellfc.h"
143
144 #if GMX_FAHCORE
145 #include "corewrap.h"
146 #endif
147
148 using gmx::SimulationSignaller;
149
150 //! Whether the GPU versions of Leap-Frog integrator and LINCS and SHAKE constraints
151 static const bool c_useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
152
153 void gmx::LegacySimulator::do_md()
154 {
155     // TODO Historically, the EM and MD "integrators" used different
156     // names for the t_inputrec *parameter, but these must have the
157     // same name, now that it's a member of a struct. We use this ir
158     // alias to avoid a large ripple of nearly useless changes.
159     // t_inputrec is being replaced by IMdpOptionsProvider, so this
160     // will go away eventually.
161     t_inputrec             *ir   = inputrec;
162     int64_t                 step, step_rel;
163     double                  t, t0 = ir->init_t, lam0[efptNR];
164     gmx_bool                bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
165     gmx_bool                bNS, bNStList, bStopCM,
166                             bFirstStep, bInitStep, bLastStep = FALSE;
167     gmx_bool                bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
168     gmx_bool                do_ene, do_log, do_verbose;
169     gmx_bool                bMasterState;
170     unsigned int            force_flags;
171     tensor                  force_vir = {{0}}, shake_vir = {{0}}, total_vir = {{0}},
172                             tmp_vir   = {{0}}, pres = {{0}};
173     int                     i, m;
174     rvec                    mu_tot;
175     matrix                  parrinellorahmanMu, M;
176     gmx_repl_ex_t           repl_ex = nullptr;
177     gmx_localtop_t          top;
178     PaddedVector<gmx::RVec> f {};
179     gmx_global_stat_t       gstat;
180     t_graph                *graph = nullptr;
181     gmx_shellfc_t          *shellfc;
182     gmx_bool                bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
183     gmx_bool                bTemp, bPres, bTrotter;
184     real                    dvdl_constr;
185     std::vector<RVec>       cbuf;
186     matrix                  lastbox;
187     int                     lamnew  = 0;
188     /* for FEP */
189     int                     nstfep = 0;
190     double                  cycles;
191     real                    saved_conserved_quantity = 0;
192     real                    last_ekin                = 0;
193     t_extmass               MassQ;
194     char                    sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
195
196     /* PME load balancing data for GPU kernels */
197     gmx_bool              bPMETune         = FALSE;
198     gmx_bool              bPMETunePrinting = FALSE;
199
200     bool                  bInteractiveMDstep = false;
201
202     /* Domain decomposition could incorrectly miss a bonded
203        interaction, but checking for that requires a global
204        communication stage, which does not otherwise happen in DD
205        code. So we do that alongside the first global energy reduction
206        after a new DD is made. These variables handle whether the
207        check happens, and the result it returns. */
208     bool              shouldCheckNumberOfBondedInteractions = false;
209     int               totalNumberOfBondedInteractions       = -1;
210
211     SimulationSignals signals;
212     // Most global communnication stages don't propagate mdrun
213     // signals, and will use this object to achieve that.
214     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
215
216     if (!mdrunOptions.writeConfout)
217     {
218         // This is on by default, and the main known use case for
219         // turning it off is for convenience in benchmarking, which is
220         // something that should not show up in the general user
221         // interface.
222         GMX_LOG(mdlog.info).asParagraph().
223             appendText("The -noconfout functionality is deprecated, and may be removed in a future version.");
224     }
225
226     /* md-vv uses averaged full step velocities for T-control
227        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
228        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
229     bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
230
231     const bool bRerunMD      = false;
232
233     int        nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
234     bGStatEveryStep = (nstglobalcomm == 1);
235
236     SimulationGroups                  *groups = &top_global->groups;
237
238     std::unique_ptr<EssentialDynamics> ed = nullptr;
239     if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr)
240     {
241         /* Initialize essential dynamics sampling */
242         ed = init_edsam(mdlog,
243                         opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm),
244                         top_global,
245                         ir, cr, constr,
246                         state_global, observablesHistory,
247                         oenv,
248                         startingBehavior);
249     }
250
251     initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
252     Update upd(ir, deform);
253     bool   doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
254     if (startingBehavior != StartingBehavior::RestartWithAppending)
255     {
256         pleaseCiteCouplingAlgorithms(fplog, *ir);
257     }
258     gmx_mdoutf       *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, top_global, oenv, wcycle,
259                                          StartingBehavior::NewSimulation);
260     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, mdoutf_get_fp_dhdl(outf), false);
261
262     gstat = global_stat_init(ir);
263
264     /* Check for polarizable models and flexible constraints */
265     shellfc = init_shell_flexcon(fplog,
266                                  top_global, constr ? constr->numFlexibleConstraints() : 0,
267                                  ir->nstcalcenergy, DOMAINDECOMP(cr));
268
269     {
270         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
271         if ((io > 2000) && MASTER(cr))
272         {
273             fprintf(stderr,
274                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
275                     io);
276         }
277     }
278
279     // Local state only becomes valid now.
280     std::unique_ptr<t_state> stateInstance;
281     t_state *                state;
282
283
284     auto mdatoms = mdAtoms->mdatoms();
285
286     std::unique_ptr<UpdateConstrainCuda> integrator;
287
288     if (DOMAINDECOMP(cr))
289     {
290         dd_init_local_top(*top_global, &top);
291
292         stateInstance = std::make_unique<t_state>();
293         state         = stateInstance.get();
294         dd_init_local_state(cr->dd, state_global, state);
295
296         /* Distribute the charge groups over the nodes from the master node */
297         dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1,
298                             state_global, *top_global, ir, imdSession,
299                             pull_work,
300                             state, &f, mdAtoms, &top, fr,
301                             vsite, constr,
302                             nrnb, nullptr, FALSE);
303         shouldCheckNumberOfBondedInteractions = true;
304         upd.setNumAtoms(state->natoms);
305     }
306     else
307     {
308         state_change_natoms(state_global, state_global->natoms);
309         f.resizeWithPadding(state_global->natoms);
310         /* Copy the pointer to the global state */
311         state = state_global;
312
313         /* Generate and initialize new topology */
314         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr,
315                                   &graph, mdAtoms, constr, vsite, shellfc);
316
317         upd.setNumAtoms(state->natoms);
318     }
319
320     if (c_useGpuUpdateConstrain)
321     {
322         GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU.");
323         GMX_RELEASE_ASSERT(ir->etc == etcNO, "Temperature coupling is not supported on the GPU.");
324         GMX_RELEASE_ASSERT(ir->epc == epcNO, "Pressure coupling is not supported on the GPU.");
325         GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU");
326         GMX_LOG(mdlog.info).asParagraph().
327             appendText("Using CUDA GPU-based update and constraints module.");
328         integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global);
329         integrator->set(top.idef, *mdatoms);
330         t_pbc pbc;
331         set_pbc(&pbc, epbcXYZ, state->box);
332         integrator->setPbc(&pbc);
333     }
334
335     if (fr->nbv->useGpu())
336     {
337         changePinningPolicy(&state->x, gmx::PinningPolicy::PinnedIfSupported);
338     }
339
340     // NOTE: The global state is no longer used at this point.
341     // But state_global is still used as temporary storage space for writing
342     // the global state to file and potentially for replica exchange.
343     // (Global topology should persist.)
344
345     update_mdatoms(mdatoms, state->lambda[efptMASS]);
346
347     if (ir->bExpanded)
348     {
349         /* Check nstexpanded here, because the grompp check was broken */
350         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
351         {
352             gmx_fatal(FARGS, "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
353         }
354         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation,
355                                ir, state->dfhist);
356     }
357
358     if (MASTER(cr))
359     {
360         if (startingBehavior != StartingBehavior::NewSimulation)
361         {
362             /* Restore from energy history if appending to output files */
363             if (startingBehavior == StartingBehavior::RestartWithAppending)
364             {
365                 /* If no history is available (because a checkpoint is from before
366                  * it was written) make a new one later, otherwise restore it.
367                  */
368                 if (observablesHistory->energyHistory)
369                 {
370                     energyOutput.restoreFromEnergyHistory(*observablesHistory->energyHistory);
371                 }
372             }
373             else if (observablesHistory->energyHistory)
374             {
375                 /* We might have read an energy history from checkpoint.
376                  * As we are not appending, we want to restart the statistics.
377                  * Free the allocated memory and reset the counts.
378                  */
379                 observablesHistory->energyHistory = {};
380                 /* We might have read a pull history from checkpoint.
381                  * We will still want to keep the statistics, so that the files
382                  * can be joined and still be meaningful.
383                  * This means that observablesHistory->pullHistory
384                  * should not be reset.
385                  */
386             }
387         }
388         if (!observablesHistory->energyHistory)
389         {
390             observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
391         }
392         if (!observablesHistory->pullHistory)
393         {
394             observablesHistory->pullHistory = std::make_unique<PullHistory>();
395         }
396         /* Set the initial energy history */
397         energyOutput.fillEnergyHistory(observablesHistory->energyHistory.get());
398     }
399
400     preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
401                            startingBehavior != StartingBehavior::NewSimulation);
402
403     // TODO: Remove this by converting AWH into a ForceProvider
404     auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingBehavior != StartingBehavior::NewSimulation,
405                                 shellfc != nullptr,
406                                 opt2fn("-awh", nfile, fnm), pull_work);
407
408     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
409     if (useReplicaExchange && MASTER(cr))
410     {
411         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir,
412                                         replExParams);
413     }
414     /* PME tuning is only supported in the Verlet scheme, with PME for
415      * Coulomb. It is not supported with only LJ PME. */
416     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) &&
417                 !mdrunOptions.reproducible && ir->cutoff_scheme != ecutsGROUP);
418
419     pme_load_balancing_t *pme_loadbal      = nullptr;
420     if (bPMETune)
421     {
422         pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
423                          *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu(),
424                          &bPMETunePrinting);
425     }
426
427     if (!ir->bContinuation)
428     {
429         if (state->flags & (1u << estV))
430         {
431             auto v = makeArrayRef(state->v);
432             /* Set the velocities of vsites, shells and frozen atoms to zero */
433             for (i = 0; i < mdatoms->homenr; i++)
434             {
435                 if (mdatoms->ptype[i] == eptVSite ||
436                     mdatoms->ptype[i] == eptShell)
437                 {
438                     clear_rvec(v[i]);
439                 }
440                 else if (mdatoms->cFREEZE)
441                 {
442                     for (m = 0; m < DIM; m++)
443                     {
444                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
445                         {
446                             v[i][m] = 0;
447                         }
448                     }
449                 }
450             }
451         }
452
453         if (constr)
454         {
455             /* Constrain the initial coordinates and velocities */
456             do_constrain_first(fplog, constr, ir, mdatoms, state);
457         }
458         if (vsite)
459         {
460             /* Construct the virtual sites for the initial configuration */
461             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr,
462                              top.idef.iparams, top.idef.il,
463                              fr->ePBC, fr->bMolPBC, cr, state->box);
464         }
465     }
466
467     if (ir->efep != efepNO)
468     {
469         /* Set free energy calculation frequency as the greatest common
470          * denominator of nstdhdl and repl_ex_nst. */
471         nstfep = ir->fepvals->nstdhdl;
472         if (ir->bExpanded)
473         {
474             nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
475         }
476         if (useReplicaExchange)
477         {
478             nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
479         }
480     }
481
482     /* Be REALLY careful about what flags you set here. You CANNOT assume
483      * this is the first step, since we might be restarting from a checkpoint,
484      * and in that case we should not do any modifications to the state.
485      */
486     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
487
488     // When restarting from a checkpoint, it can be appropriate to
489     // initialize ekind from quantities in the checkpoint. Otherwise,
490     // compute_globals must initialize ekind before the simulation
491     // starts/restarts. However, only the master rank knows what was
492     // found in the checkpoint file, so we have to communicate in
493     // order to coordinate the restart.
494     //
495     // TODO Consider removing this communication if/when checkpoint
496     // reading directly follows .tpr reading, because all ranks can
497     // agree on hasReadEkinState at that time.
498     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
499     if (PAR(cr))
500     {
501         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
502     }
503     if (hasReadEkinState)
504     {
505         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
506     }
507
508     unsigned int cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
509                                | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
510                                | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0)
511                                | (hasReadEkinState ? CGLO_READEKIN : 0));
512
513     bSumEkinhOld = FALSE;
514
515     t_vcm vcm(top_global->groups, *ir);
516     reportComRemovalInfo(fplog, vcm);
517
518     /* To minimize communication, compute_globals computes the COM velocity
519      * and the kinetic energy for the velocities without COM motion removed.
520      * Thus to get the kinetic energy without the COM contribution, we need
521      * to call compute_globals twice.
522      */
523     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
524     {
525         unsigned int cglo_flags_iteration = cglo_flags;
526         if (bStopCM && cgloIteration == 0)
527         {
528             cglo_flags_iteration |= CGLO_STOPCM;
529             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
530         }
531         compute_globals(gstat, cr, ir, fr, ekind,
532                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
533                         mdatoms, nrnb, &vcm,
534                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
535                         constr, &nullSignaller, state->box,
536                         &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags_iteration
537                         | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0));
538         if (cglo_flags_iteration & CGLO_STOPCM)
539         {
540             /* At initialization, do not pass x with acceleration-correction mode
541              * to avoid (incorrect) correction of the initial coordinates.
542              */
543             rvec *xPtr = nullptr;
544             if (vcm.mode != ecmLINEAR_ACCELERATION_CORRECTION)
545             {
546                 xPtr = state->x.rvec_array();
547             }
548             process_and_stopcm_grp(fplog, &vcm, *mdatoms, xPtr, state->v.rvec_array());
549             inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
550         }
551     }
552     checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
553                                     top_global, &top, state->x.rvec_array(), state->box,
554                                     &shouldCheckNumberOfBondedInteractions);
555     if (ir->eI == eiVVAK)
556     {
557         /* a second call to get the half step temperature initialized as well */
558         /* we do the same call as above, but turn the pressure off -- internally to
559            compute_globals, this is recognized as a velocity verlet half-step
560            kinetic energy calculation.  This minimized excess variables, but
561            perhaps loses some logic?*/
562
563         compute_globals(gstat, cr, ir, fr, ekind,
564                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
565                         mdatoms, nrnb, &vcm,
566                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
567                         constr, &nullSignaller, state->box,
568                         nullptr, &bSumEkinhOld,
569                         cglo_flags & ~CGLO_PRESSURE);
570     }
571
572     /* Calculate the initial half step temperature, and save the ekinh_old */
573     if (startingBehavior == StartingBehavior::NewSimulation)
574     {
575         for (i = 0; (i < ir->opts.ngtc); i++)
576         {
577             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
578         }
579     }
580
581     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
582        temperature control */
583     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
584
585     if (MASTER(cr))
586     {
587         if (!ir->bContinuation)
588         {
589             if (constr && ir->eConstrAlg == econtLINCS)
590             {
591                 fprintf(fplog,
592                         "RMS relative constraint deviation after constraining: %.2e\n",
593                         constr->rmsd());
594             }
595             if (EI_STATE_VELOCITY(ir->eI))
596             {
597                 real temp = enerd->term[F_TEMP];
598                 if (ir->eI != eiVV)
599                 {
600                     /* Result of Ekin averaged over velocities of -half
601                      * and +half step, while we only have -half step here.
602                      */
603                     temp *= 2;
604                 }
605                 fprintf(fplog, "Initial temperature: %g K\n", temp);
606             }
607         }
608
609         char tbuf[20];
610         fprintf(stderr, "starting mdrun '%s'\n",
611                 *(top_global->name));
612         if (ir->nsteps >= 0)
613         {
614             sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
615         }
616         else
617         {
618             sprintf(tbuf, "%s", "infinite");
619         }
620         if (ir->init_step > 0)
621         {
622             fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
623                     gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
624                     gmx_step_str(ir->init_step, sbuf2),
625                     ir->init_step*ir->delta_t);
626         }
627         else
628         {
629             fprintf(stderr, "%s steps, %s ps.\n",
630                     gmx_step_str(ir->nsteps, sbuf), tbuf);
631         }
632         fprintf(fplog, "\n");
633     }
634
635     walltime_accounting_start_time(walltime_accounting);
636     wallcycle_start(wcycle, ewcRUN);
637     print_start(fplog, cr, walltime_accounting, "mdrun");
638
639 #if GMX_FAHCORE
640     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
641     int chkpt_ret = fcCheckPointParallel( cr->nodeid,
642                                           NULL, 0);
643     if (chkpt_ret == 0)
644     {
645         gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
646     }
647 #endif
648
649     /***********************************************************
650      *
651      *             Loop over MD steps
652      *
653      ************************************************************/
654
655     bFirstStep       = TRUE;
656     /* Skip the first Nose-Hoover integration when we get the state from tpx */
657     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
658     bSumEkinhOld     = FALSE;
659     bExchanged       = FALSE;
660     bNeedRepartition = FALSE;
661
662     bool simulationsShareState = false;
663     int  nstSignalComm         = nstglobalcomm;
664     {
665         // TODO This implementation of ensemble orientation restraints is nasty because
666         // a user can't just do multi-sim with single-sim orientation restraints.
667         bool usingEnsembleRestraints = (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
668         bool awhUsesMultiSim         = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
669
670         // Replica exchange, ensemble restraints and AWH need all
671         // simulations to remain synchronized, so they need
672         // checkpoints and stop conditions to act on the same step, so
673         // the propagation of such signals must take place between
674         // simulations, not just within simulations.
675         // TODO: Make algorithm initializers set these flags.
676         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
677
678         if (simulationsShareState)
679         {
680             // Inter-simulation signal communication does not need to happen
681             // often, so we use a minimum of 200 steps to reduce overhead.
682             const int c_minimumInterSimulationSignallingInterval = 200;
683             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1)/nstglobalcomm)*nstglobalcomm;
684         }
685     }
686
687     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
688                 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
689                 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
690                 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
691
692     auto checkpointHandler = std::make_unique<CheckpointHandler>(
693                 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
694                 simulationsShareState, ir->nstlist == 0, MASTER(cr),
695                 mdrunOptions.writeConfout, mdrunOptions.checkpointOptions.period);
696
697     const bool resetCountersIsLocal = true;
698     auto       resetHandler         = std::make_unique<ResetHandler>(
699                 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]), !resetCountersIsLocal,
700                 ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
701                 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
702
703     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
704
705     step     = ir->init_step;
706     step_rel = 0;
707
708     // TODO extract this to new multi-simulation module
709     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
710     {
711         if (!multisim_int_all_are_equal(ms, ir->nsteps))
712         {
713             GMX_LOG(mdlog.warning).appendText(
714                     "Note: The number of steps is not consistent across multi simulations,\n"
715                     "but we are proceeding anyway!");
716         }
717         if (!multisim_int_all_are_equal(ms, ir->init_step))
718         {
719             GMX_LOG(mdlog.warning).appendText(
720                     "Note: The initial step is not consistent across multi simulations,\n"
721                     "but we are proceeding anyway!");
722         }
723     }
724
725     /* and stop now if we should */
726     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
727     while (!bLastStep)
728     {
729
730         /* Determine if this is a neighbor search step */
731         bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
732
733         if (bPMETune && bNStList)
734         {
735             /* PME grid + cut-off optimization with GPUs or PME nodes */
736             pme_loadbal_do(pme_loadbal, cr,
737                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
738                            fplog, mdlog,
739                            *ir, fr, *state,
740                            wcycle,
741                            step, step_rel,
742                            &bPMETunePrinting);
743         }
744
745         wallcycle_start(wcycle, ewcSTEP);
746
747         bLastStep = (step_rel == ir->nsteps);
748         t         = t0 + step*ir->delta_t;
749
750         // TODO Refactor this, so that nstfep does not need a default value of zero
751         if (ir->efep != efepNO || ir->bSimTemp)
752         {
753             /* find and set the current lambdas */
754             setCurrentLambdasLocal(step, ir->fepvals, lam0, state);
755
756             bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
757             bDoFEP       = ((ir->efep != efepNO) && do_per_step(step, nstfep));
758             bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
759                             && (ir->bExpanded) && (step > 0) &&
760                             (startingBehavior == StartingBehavior::NewSimulation));
761         }
762
763         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep &&
764                      do_per_step(step, replExParams.exchangeInterval));
765
766         if (doSimulatedAnnealing)
767         {
768             update_annealing_target_temp(ir, t, &upd);
769         }
770
771         /* Stop Center of Mass motion */
772         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
773
774         /* Determine whether or not to do Neighbour Searching */
775         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
776
777         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
778
779         /* do_log triggers energy and virial calculation. Because this leads
780          * to different code paths, forces can be different. Thus for exact
781          * continuation we should avoid extra log output.
782          * Note that the || bLastStep can result in non-exact continuation
783          * beyond the last step. But we don't consider that to be an issue.
784          */
785         do_log     =
786             (do_per_step(step, ir->nstlog) ||
787              (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) ||
788              bLastStep);
789         do_verbose = mdrunOptions.verbose &&
790             (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
791
792         if (bNS && !(bFirstStep && ir->bContinuation))
793         {
794             bMasterState = FALSE;
795             /* Correct the new box if it is too skewed */
796             if (inputrecDynamicBox(ir))
797             {
798                 if (correct_box(fplog, step, state->box, graph))
799                 {
800                     bMasterState = TRUE;
801                 }
802             }
803             if (DOMAINDECOMP(cr) && bMasterState)
804             {
805                 dd_collect_state(cr->dd, state, state_global);
806             }
807
808             if (DOMAINDECOMP(cr))
809             {
810                 /* Repartition the domain decomposition */
811                 dd_partition_system(fplog, mdlog, step, cr,
812                                     bMasterState, nstglobalcomm,
813                                     state_global, *top_global, ir, imdSession,
814                                     pull_work,
815                                     state, &f, mdAtoms, &top, fr,
816                                     vsite, constr,
817                                     nrnb, wcycle,
818                                     do_verbose && !bPMETunePrinting);
819                 shouldCheckNumberOfBondedInteractions = true;
820                 upd.setNumAtoms(state->natoms);
821             }
822         }
823
824         if (MASTER(cr) && do_log)
825         {
826             energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
827         }
828
829         if (ir->efep != efepNO)
830         {
831             update_mdatoms(mdatoms, state->lambda[efptMASS]);
832         }
833
834         if (bExchanged)
835         {
836
837             /* We need the kinetic energy at minus the half step for determining
838              * the full step kinetic energy and possibly for T-coupling.*/
839             /* This may not be quite working correctly yet . . . . */
840             compute_globals(gstat, cr, ir, fr, ekind,
841                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
842                             mdatoms, nrnb, &vcm,
843                             wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
844                             constr, &nullSignaller, state->box,
845                             &totalNumberOfBondedInteractions, &bSumEkinhOld,
846                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
847             checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
848                                             top_global, &top, state->x.rvec_array(), state->box,
849                                             &shouldCheckNumberOfBondedInteractions);
850         }
851         clear_mat(force_vir);
852
853         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
854
855         /* Determine the energy and pressure:
856          * at nstcalcenergy steps and at energy output steps (set below).
857          */
858         if (EI_VV(ir->eI) && (!bInitStep))
859         {
860             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
861             bCalcVir      = bCalcEnerStep ||
862                 (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
863         }
864         else
865         {
866             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
867             bCalcVir      = bCalcEnerStep ||
868                 (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
869         }
870         bCalcEner = bCalcEnerStep;
871
872         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
873
874         if (do_ene || do_log || bDoReplEx)
875         {
876             bCalcVir  = TRUE;
877             bCalcEner = TRUE;
878         }
879
880         /* Do we need global communication ? */
881         bGStat = (bCalcVir || bCalcEner || bStopCM ||
882                   do_per_step(step, nstglobalcomm) ||
883                   (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm)));
884
885         force_flags = (GMX_FORCE_STATECHANGED |
886                        ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) |
887                        GMX_FORCE_ALLFORCES |
888                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
889                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
890                        (bDoFEP ? GMX_FORCE_DHDL : 0)
891                        );
892
893         if (shellfc)
894         {
895             /* Now is the time to relax the shells */
896             relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose,
897                                 enforcedRotation, step,
898                                 ir, imdSession, pull_work, bNS, force_flags, &top,
899                                 constr, enerd, fcd,
900                                 state->natoms,
901                                 state->x.arrayRefWithPadding(),
902                                 state->v.arrayRefWithPadding(),
903                                 state->box,
904                                 state->lambda,
905                                 &state->hist,
906                                 f.arrayRefWithPadding(), force_vir, mdatoms,
907                                 nrnb, wcycle, graph,
908                                 shellfc, fr, ppForceWorkload, t, mu_tot,
909                                 vsite,
910                                 ddBalanceRegionHandler);
911         }
912         else
913         {
914             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias is updated
915                (or the AWH update will be performed twice for one step when continuing). It would be best to
916                call this update function from do_md_trajectory_writing but that would occur after do_force.
917                One would have to divide the update_awh function into one function applying the AWH force
918                and one doing the AWH bias update. The update AWH bias function could then be called after
919                do_md_trajectory_writing (then containing update_awh_history).
920                The checkpointing will in the future probably moved to the start of the md loop which will
921                rid of this issue. */
922             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
923             {
924                 awh->updateHistory(state_global->awhHistory.get());
925             }
926
927             /* The coordinates (x) are shifted (to get whole molecules)
928              * in do_force.
929              * This is parallellized as well, and does communication too.
930              * Check comments in sim_util.c
931              */
932             do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession,
933                      pull_work,
934                      step, nrnb, wcycle, &top,
935                      state->box, state->x.arrayRefWithPadding(), &state->hist,
936                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
937                      state->lambda, graph,
938                      fr, ppForceWorkload, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
939                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
940                      ddBalanceRegionHandler);
941         }
942
943         // VV integrators do not need the following velocity half step
944         // if it is the first step after starting from a checkpoint.
945         // That is, the half step is needed on all other steps, and
946         // also the first step when starting from a .tpr file.
947         if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
948         /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
949         {
950             rvec *vbuf = nullptr;
951
952             wallcycle_start(wcycle, ewcUPDATE);
953             if (ir->eI == eiVV && bInitStep)
954             {
955                 /* if using velocity verlet with full time step Ekin,
956                  * take the first half step only to compute the
957                  * virial for the first step. From there,
958                  * revert back to the initial coordinates
959                  * so that the input is actually the initial step.
960                  */
961                 snew(vbuf, state->natoms);
962                 copy_rvecn(state->v.rvec_array(), vbuf, 0, state->natoms); /* should make this better for parallelizing? */
963             }
964             else
965             {
966                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
967                 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
968             }
969
970             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
971                           ekind, M, &upd, etrtVELOCITY1,
972                           cr, constr);
973
974             wallcycle_stop(wcycle, ewcUPDATE);
975             constrain_velocities(step, nullptr,
976                                  state,
977                                  shake_vir,
978                                  constr,
979                                  bCalcVir, do_log, do_ene);
980             wallcycle_start(wcycle, ewcUPDATE);
981             /* if VV, compute the pressure and constraints */
982             /* For VV2, we strictly only need this if using pressure
983              * control, but we really would like to have accurate pressures
984              * printed out.
985              * Think about ways around this in the future?
986              * For now, keep this choice in comments.
987              */
988             /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
989             /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
990             bPres = TRUE;
991             bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
992             if (bCalcEner && ir->eI == eiVVAK)
993             {
994                 bSumEkinhOld = TRUE;
995             }
996             /* for vv, the first half of the integration actually corresponds to the previous step.
997                So we need information from the last step in the first half of the integration */
998             if (bGStat || do_per_step(step-1, nstglobalcomm))
999             {
1000                 wallcycle_stop(wcycle, ewcUPDATE);
1001                 compute_globals(gstat, cr, ir, fr, ekind,
1002                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1003                                 mdatoms, nrnb, &vcm,
1004                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1005                                 constr, &nullSignaller, state->box,
1006                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1007                                 (bGStat ? CGLO_GSTAT : 0)
1008                                 | (bCalcEner ? CGLO_ENERGY : 0)
1009                                 | (bTemp ? CGLO_TEMPERATURE : 0)
1010                                 | (bPres ? CGLO_PRESSURE : 0)
1011                                 | (bPres ? CGLO_CONSTRAINT : 0)
1012                                 | (bStopCM ? CGLO_STOPCM : 0)
1013                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1014                                 | CGLO_SCALEEKIN
1015                                 );
1016                 /* explanation of above:
1017                    a) We compute Ekin at the full time step
1018                    if 1) we are using the AveVel Ekin, and it's not the
1019                    initial step, or 2) if we are using AveEkin, but need the full
1020                    time step kinetic energy for the pressure (always true now, since we want accurate statistics).
1021                    b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
1022                    EkinAveVel because it's needed for the pressure */
1023                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1024                                                 top_global, &top, state->x.rvec_array(), state->box,
1025                                                 &shouldCheckNumberOfBondedInteractions);
1026                 if (bStopCM)
1027                 {
1028                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1029                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1030                 }
1031                 wallcycle_start(wcycle, ewcUPDATE);
1032             }
1033             /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1034             if (!bInitStep)
1035             {
1036                 if (bTrotter)
1037                 {
1038                     m_add(force_vir, shake_vir, total_vir);     /* we need the un-dispersion corrected total vir here */
1039                     trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
1040
1041                     /* TODO This is only needed when we're about to write
1042                      * a checkpoint, because we use it after the restart
1043                      * (in a kludge?). But what should we be doing if
1044                      * the startingBehavior is NewSimulation or bInitStep are true? */
1045                     if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
1046                     {
1047                         copy_mat(shake_vir, state->svir_prev);
1048                         copy_mat(force_vir, state->fvir_prev);
1049                     }
1050                     if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
1051                     {
1052                         /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1053                         enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
1054                         enerd->term[F_EKIN] = trace(ekind->ekin);
1055                     }
1056                 }
1057                 else if (bExchanged)
1058                 {
1059                     wallcycle_stop(wcycle, ewcUPDATE);
1060                     /* We need the kinetic energy at minus the half step for determining
1061                      * the full step kinetic energy and possibly for T-coupling.*/
1062                     /* This may not be quite working correctly yet . . . . */
1063                     compute_globals(gstat, cr, ir, fr, ekind,
1064                                     state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1065                                     mdatoms, nrnb, &vcm,
1066                                     wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
1067                                     constr, &nullSignaller, state->box,
1068                                     nullptr, &bSumEkinhOld,
1069                                     CGLO_GSTAT | CGLO_TEMPERATURE);
1070                     wallcycle_start(wcycle, ewcUPDATE);
1071                 }
1072             }
1073             /* if it's the initial step, we performed this first step just to get the constraint virial */
1074             if (ir->eI == eiVV && bInitStep)
1075             {
1076                 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
1077                 sfree(vbuf);
1078             }
1079             wallcycle_stop(wcycle, ewcUPDATE);
1080         }
1081
1082         /* compute the conserved quantity */
1083         if (EI_VV(ir->eI))
1084         {
1085             saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1086             if (ir->eI == eiVV)
1087             {
1088                 last_ekin = enerd->term[F_EKIN];
1089             }
1090             if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1091             {
1092                 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1093             }
1094             /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
1095             if (ir->efep != efepNO)
1096             {
1097                 sum_dhdl(enerd, state->lambda, ir->fepvals);
1098             }
1099         }
1100
1101         /* ########  END FIRST UPDATE STEP  ############## */
1102         /* ########  If doing VV, we now have v(dt) ###### */
1103         if (bDoExpanded)
1104         {
1105             /* perform extended ensemble sampling in lambda - we don't
1106                actually move to the new state before outputting
1107                statistics, but if performing simulated tempering, we
1108                do update the velocities and the tau_t. */
1109
1110             lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, state->dfhist, step, state->v.rvec_array(), mdatoms);
1111             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1112             if (MASTER(cr))
1113             {
1114                 copy_df_history(state_global->dfhist, state->dfhist);
1115             }
1116         }
1117
1118         /* Now we have the energies and forces corresponding to the
1119          * coordinates at time t. We must output all of this before
1120          * the update.
1121          */
1122         do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
1123                                  ir, state, state_global, observablesHistory,
1124                                  top_global, fr,
1125                                  outf, energyOutput, ekind, f,
1126                                  checkpointHandler->isCheckpointingStep(),
1127                                  bRerunMD, bLastStep,
1128                                  mdrunOptions.writeConfout,
1129                                  bSumEkinhOld);
1130         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1131         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1132
1133         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1134         if (startingBehavior != StartingBehavior::NewSimulation &&
1135             (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1136         {
1137             copy_mat(state->svir_prev, shake_vir);
1138             copy_mat(state->fvir_prev, force_vir);
1139         }
1140
1141         stopHandler->setSignal();
1142         resetHandler->setSignal(walltime_accounting);
1143
1144         if (bGStat || !PAR(cr))
1145         {
1146             /* In parallel we only have to check for checkpointing in steps
1147              * where we do global communication,
1148              *  otherwise the other nodes don't know.
1149              */
1150             checkpointHandler->setSignal(walltime_accounting);
1151         }
1152
1153         /* #########   START SECOND UPDATE STEP ################# */
1154
1155         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
1156            in preprocessing */
1157
1158         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1159         {
1160             gmx_bool bIfRandomize;
1161             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1162             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1163             if (constr && bIfRandomize)
1164             {
1165                 constrain_velocities(step, nullptr,
1166                                      state,
1167                                      tmp_vir,
1168                                      constr,
1169                                      bCalcVir, do_log, do_ene);
1170             }
1171         }
1172         /* Box is changed in update() when we do pressure coupling,
1173          * but we should still use the old box for energy corrections and when
1174          * writing it to the energy file, so it matches the trajectory files for
1175          * the same timestep above. Make a copy in a separate array.
1176          */
1177         copy_mat(state->box, lastbox);
1178
1179         dvdl_constr = 0;
1180
1181         wallcycle_start(wcycle, ewcUPDATE);
1182         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1183         if (bTrotter)
1184         {
1185             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1186             /* We can only do Berendsen coupling after we have summed
1187              * the kinetic energy or virial. Since the happens
1188              * in global_state after update, we should only do it at
1189              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1190              */
1191         }
1192         else
1193         {
1194             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1195             update_pcouple_before_coordinates(fplog, step, ir, state,
1196                                               parrinellorahmanMu, M,
1197                                               bInitStep);
1198         }
1199
1200         if (EI_VV(ir->eI))
1201         {
1202             /* velocity half-step update */
1203             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1204                           ekind, M, &upd, etrtVELOCITY2,
1205                           cr, constr);
1206         }
1207
1208         /* Above, initialize just copies ekinh into ekin,
1209          * it doesn't copy position (for VV),
1210          * and entire integrator for MD.
1211          */
1212
1213         if (ir->eI == eiVVAK)
1214         {
1215             cbuf.resize(state->x.size());
1216             std::copy(state->x.begin(), state->x.end(), cbuf.begin());
1217         }
1218
1219         if (c_useGpuUpdateConstrain)
1220         {
1221             if (bNS)
1222             {
1223                 integrator->set(top.idef, *mdatoms);
1224                 t_pbc pbc;
1225                 set_pbc(&pbc, epbcXYZ, state->box);
1226                 integrator->setPbc(&pbc);
1227             }
1228             integrator->copyCoordinatesToGpu(state->x.rvec_array());
1229             integrator->copyVelocitiesToGpu(state->v.rvec_array());
1230             integrator->copyForcesToGpu(as_rvec_array(f.data()));
1231
1232             // This applies Leap-Frog, LINCS and SETTLE in a succession
1233             integrator->integrate(ir->delta_t, true, bCalcVir, shake_vir);
1234
1235             integrator->copyCoordinatesFromGpu(state->x.rvec_array());
1236             integrator->copyVelocitiesFromGpu(state->v.rvec_array());
1237         }
1238         else
1239         {
1240             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1241                           ekind, M, &upd, etrtPOSITION, cr, constr);
1242
1243             wallcycle_stop(wcycle, ewcUPDATE);
1244
1245             constrain_coordinates(step, &dvdl_constr, state,
1246                                   shake_vir,
1247                                   &upd, constr,
1248                                   bCalcVir, do_log, do_ene);
1249
1250             update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state,
1251                                   cr, nrnb, wcycle, &upd, constr, do_log, do_ene);
1252             finish_update(ir, mdatoms,
1253                           state, graph,
1254                           nrnb, wcycle, &upd, constr);
1255         }
1256
1257         if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1258         {
1259             updatePrevStepPullCom(pull_work, state);
1260         }
1261
1262         if (ir->eI == eiVVAK)
1263         {
1264             /* erase F_EKIN and F_TEMP here? */
1265             /* just compute the kinetic energy at the half step to perform a trotter step */
1266             compute_globals(gstat, cr, ir, fr, ekind,
1267                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1268                             mdatoms, nrnb, &vcm,
1269                             wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1270                             constr, &nullSignaller, lastbox,
1271                             nullptr, &bSumEkinhOld,
1272                             (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE
1273                             );
1274             wallcycle_start(wcycle, ewcUPDATE);
1275             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1276             /* now we know the scaling, we can compute the positions again */
1277             std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
1278
1279             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1280                           ekind, M, &upd, etrtPOSITION, cr, constr);
1281             wallcycle_stop(wcycle, ewcUPDATE);
1282
1283             /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1284             /* are the small terms in the shake_vir here due
1285              * to numerical errors, or are they important
1286              * physically? I'm thinking they are just errors, but not completely sure.
1287              * For now, will call without actually constraining, constr=NULL*/
1288             finish_update(ir, mdatoms,
1289                           state, graph,
1290                           nrnb, wcycle, &upd, nullptr);
1291         }
1292         if (EI_VV(ir->eI))
1293         {
1294             /* this factor or 2 correction is necessary
1295                because half of the constraint force is removed
1296                in the vv step, so we have to double it.  See
1297                the Redmine issue #1255.  It is not yet clear
1298                if the factor of 2 is exact, or just a very
1299                good approximation, and this will be
1300                investigated.  The next step is to see if this
1301                can be done adding a dhdl contribution from the
1302                rattle step, but this is somewhat more
1303                complicated with the current code. Will be
1304                investigated, hopefully for 4.6.3. However,
1305                this current solution is much better than
1306                having it completely wrong.
1307              */
1308             enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
1309         }
1310         else
1311         {
1312             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1313         }
1314
1315         if (vsite != nullptr)
1316         {
1317             wallcycle_start(wcycle, ewcVSITECONSTR);
1318             if (graph != nullptr)
1319             {
1320                 shift_self(graph, state->box, state->x.rvec_array());
1321             }
1322             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
1323                              top.idef.iparams, top.idef.il,
1324                              fr->ePBC, fr->bMolPBC, cr, state->box);
1325
1326             if (graph != nullptr)
1327             {
1328                 unshift_self(graph, state->box, state->x.rvec_array());
1329             }
1330             wallcycle_stop(wcycle, ewcVSITECONSTR);
1331         }
1332
1333         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1334         /* With Leap-Frog we can skip compute_globals at
1335          * non-communication steps, but we need to calculate
1336          * the kinetic energy one step before communication.
1337          */
1338         {
1339             // Organize to do inter-simulation signalling on steps if
1340             // and when algorithms require it.
1341             bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1342
1343             if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal)
1344             {
1345                 // Since we're already communicating at this step, we
1346                 // can propagate intra-simulation signals. Note that
1347                 // check_nstglobalcomm has the responsibility for
1348                 // choosing the value of nstglobalcomm that is one way
1349                 // bGStat becomes true, so we can't get into a
1350                 // situation where e.g. checkpointing can't be
1351                 // signalled.
1352                 bool                doIntraSimSignal = true;
1353                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1354
1355                 compute_globals(gstat, cr, ir, fr, ekind,
1356                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1357                                 mdatoms, nrnb, &vcm,
1358                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1359                                 constr, &signaller,
1360                                 lastbox,
1361                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1362                                 (bGStat ? CGLO_GSTAT : 0)
1363                                 | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1364                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1365                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1366                                 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
1367                                 | CGLO_CONSTRAINT
1368                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1369                                 );
1370                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1371                                                 top_global, &top, state->x.rvec_array(), state->box,
1372                                                 &shouldCheckNumberOfBondedInteractions);
1373                 if (!EI_VV(ir->eI) && bStopCM)
1374                 {
1375                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1376                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1377                 }
1378             }
1379         }
1380
1381         /* #############  END CALC EKIN AND PRESSURE ################# */
1382
1383         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1384            the virial that should probably be addressed eventually. state->veta has better properies,
1385            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1386            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1387
1388         if (ir->efep != efepNO && !EI_VV(ir->eI))
1389         {
1390             /* Sum up the foreign energy and dhdl terms for md and sd.
1391                Currently done every step so that dhdl is correct in the .edr */
1392             sum_dhdl(enerd, state->lambda, ir->fepvals);
1393         }
1394
1395         update_pcouple_after_coordinates(fplog, step, ir, mdatoms,
1396                                          pres, force_vir, shake_vir,
1397                                          parrinellorahmanMu,
1398                                          state, nrnb, &upd);
1399
1400         /* ################# END UPDATE STEP 2 ################# */
1401         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1402
1403         /* The coordinates (x) were unshifted in update */
1404         if (!bGStat)
1405         {
1406             /* We will not sum ekinh_old,
1407              * so signal that we still have to do it.
1408              */
1409             bSumEkinhOld = TRUE;
1410         }
1411
1412         if (bCalcEner)
1413         {
1414             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1415
1416             /* use the directly determined last velocity, not actually the averaged half steps */
1417             if (bTrotter && ir->eI == eiVV)
1418             {
1419                 enerd->term[F_EKIN] = last_ekin;
1420             }
1421             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1422
1423             if (integratorHasConservedEnergyQuantity(ir))
1424             {
1425                 if (EI_VV(ir->eI))
1426                 {
1427                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1428                 }
1429                 else
1430                 {
1431                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1432                 }
1433             }
1434             /* #########  END PREPARING EDR OUTPUT  ###########  */
1435         }
1436
1437         /* Output stuff */
1438         if (MASTER(cr))
1439         {
1440             if (fplog && do_log && bDoExpanded)
1441             {
1442                 /* only needed if doing expanded ensemble */
1443                 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : nullptr,
1444                                           state_global->dfhist, state->fep_state, ir->nstlog, step);
1445             }
1446             if (bCalcEner)
1447             {
1448                 energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep,
1449                                                  t, mdatoms->tmass, enerd, state,
1450                                                  ir->fepvals, ir->expandedvals, lastbox,
1451                                                  shake_vir, force_vir, total_vir, pres,
1452                                                  ekind, mu_tot, constr);
1453             }
1454             else
1455             {
1456                 energyOutput.recordNonEnergyStep();
1457             }
1458
1459             gmx_bool do_dr  = do_per_step(step, ir->nstdisreout);
1460             gmx_bool do_or  = do_per_step(step, ir->nstorireout);
1461
1462             energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
1463             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1464                                                do_log ? fplog : nullptr,
1465                                                step, t,
1466                                                fcd, awh.get());
1467
1468             if (ir->bPull)
1469             {
1470                 pull_print_output(pull_work, step, t);
1471             }
1472
1473             if (do_per_step(step, ir->nstlog))
1474             {
1475                 if (fflush(fplog) != 0)
1476                 {
1477                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1478                 }
1479             }
1480         }
1481         if (bDoExpanded)
1482         {
1483             /* Have to do this part _after_ outputting the logfile and the edr file */
1484             /* Gets written into the state at the beginning of next loop*/
1485             state->fep_state = lamnew;
1486         }
1487         /* Print the remaining wall clock time for the run */
1488         if (isMasterSimMasterRank(ms, MASTER(cr)) &&
1489             (do_verbose || gmx_got_usr_signal()) &&
1490             !bPMETunePrinting)
1491         {
1492             if (shellfc)
1493             {
1494                 fprintf(stderr, "\n");
1495             }
1496             print_time(stderr, walltime_accounting, step, ir, cr);
1497         }
1498
1499         /* Ion/water position swapping.
1500          * Not done in last step since trajectory writing happens before this call
1501          * in the MD loop and exchanges would be lost anyway. */
1502         bNeedRepartition = FALSE;
1503         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
1504             do_per_step(step, ir->swap->nstswap))
1505         {
1506             bNeedRepartition = do_swapcoords(cr, step, t, ir, swap, wcycle,
1507                                              as_rvec_array(state->x.data()),
1508                                              state->box,
1509                                              MASTER(cr) && mdrunOptions.verbose,
1510                                              bRerunMD);
1511
1512             if (bNeedRepartition && DOMAINDECOMP(cr))
1513             {
1514                 dd_collect_state(cr->dd, state, state_global);
1515             }
1516         }
1517
1518         /* Replica exchange */
1519         bExchanged = FALSE;
1520         if (bDoReplEx)
1521         {
1522             bExchanged = replica_exchange(fplog, cr, ms, repl_ex,
1523                                           state_global, enerd,
1524                                           state, step, t);
1525         }
1526
1527         if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
1528         {
1529             dd_partition_system(fplog, mdlog, step, cr, TRUE, 1,
1530                                 state_global, *top_global, ir, imdSession,
1531                                 pull_work,
1532                                 state, &f, mdAtoms, &top, fr,
1533                                 vsite, constr,
1534                                 nrnb, wcycle, FALSE);
1535             shouldCheckNumberOfBondedInteractions = true;
1536             upd.setNumAtoms(state->natoms);
1537         }
1538
1539         bFirstStep             = FALSE;
1540         bInitStep              = FALSE;
1541
1542         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1543         /* With all integrators, except VV, we need to retain the pressure
1544          * at the current step for coupling at the next step.
1545          */
1546         if ((state->flags & (1u<<estPRES_PREV)) &&
1547             (bGStatEveryStep ||
1548              (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1549         {
1550             /* Store the pressure in t_state for pressure coupling
1551              * at the next MD step.
1552              */
1553             copy_mat(pres, state->pres_prev);
1554         }
1555
1556         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1557
1558         if ( (membed != nullptr) && (!bLastStep) )
1559         {
1560             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1561         }
1562
1563         cycles = wallcycle_stop(wcycle, ewcSTEP);
1564         if (DOMAINDECOMP(cr) && wcycle)
1565         {
1566             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1567         }
1568
1569         /* increase the MD step number */
1570         step++;
1571         step_rel++;
1572
1573         resetHandler->resetCounters(
1574                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(),
1575                 nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1576
1577         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1578         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1579
1580     }
1581     /* End of main MD loop */
1582
1583     /* Closing TNG files can include compressing data. Therefore it is good to do that
1584      * before stopping the time measurements. */
1585     mdoutf_tng_close(outf);
1586
1587     /* Stop measuring walltime */
1588     walltime_accounting_end_time(walltime_accounting);
1589
1590     if (!thisRankHasDuty(cr, DUTY_PME))
1591     {
1592         /* Tell the PME only node to finish */
1593         gmx_pme_send_finish(cr);
1594     }
1595
1596     if (MASTER(cr))
1597     {
1598         if (ir->nstcalcenergy > 0)
1599         {
1600             energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
1601             energyOutput.printAverages(fplog, groups);
1602         }
1603     }
1604     done_mdoutf(outf);
1605
1606     if (bPMETune)
1607     {
1608         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1609     }
1610
1611     done_shellfc(fplog, shellfc, step_rel);
1612
1613     if (useReplicaExchange && MASTER(cr))
1614     {
1615         print_replica_exchange_statistics(fplog, repl_ex);
1616     }
1617
1618     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1619
1620     global_stat_destroy(gstat);
1621
1622 }