9e33cc02ddf7b68e8005185f3e94344f863a7738
[alexxy/gromacs.git] / src / gromacs / mdrun / md.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief Implements the integrator for normal molecular dynamics simulations
40  *
41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42  * \ingroup module_mdrun
43  */
44 #include "gmxpre.h"
45
46 #include <cinttypes>
47 #include <cmath>
48 #include <cstdio>
49 #include <cstdlib>
50
51 #include <algorithm>
52 #include <memory>
53
54 #include "gromacs/awh/awh.h"
55 #include "gromacs/commandline/filenm.h"
56 #include "gromacs/domdec/collect.h"
57 #include "gromacs/domdec/dlbtiming.h"
58 #include "gromacs/domdec/domdec.h"
59 #include "gromacs/domdec/domdec_network.h"
60 #include "gromacs/domdec/domdec_struct.h"
61 #include "gromacs/domdec/mdsetup.h"
62 #include "gromacs/domdec/partition.h"
63 #include "gromacs/essentialdynamics/edsam.h"
64 #include "gromacs/ewald/pme.h"
65 #include "gromacs/ewald/pme_load_balancing.h"
66 #include "gromacs/fileio/trxio.h"
67 #include "gromacs/gmxlib/network.h"
68 #include "gromacs/gmxlib/nrnb.h"
69 #include "gromacs/gpu_utils/gpu_utils.h"
70 #include "gromacs/imd/imd.h"
71 #include "gromacs/listed_forces/manage_threading.h"
72 #include "gromacs/math/functions.h"
73 #include "gromacs/math/utilities.h"
74 #include "gromacs/math/vec.h"
75 #include "gromacs/math/vectypes.h"
76 #include "gromacs/mdlib/checkpointhandler.h"
77 #include "gromacs/mdlib/compute_io.h"
78 #include "gromacs/mdlib/constr.h"
79 #include "gromacs/mdlib/ebin.h"
80 #include "gromacs/mdlib/enerdata_utils.h"
81 #include "gromacs/mdlib/energyoutput.h"
82 #include "gromacs/mdlib/expanded.h"
83 #include "gromacs/mdlib/force.h"
84 #include "gromacs/mdlib/force_flags.h"
85 #include "gromacs/mdlib/forcerec.h"
86 #include "gromacs/mdlib/md_support.h"
87 #include "gromacs/mdlib/mdatoms.h"
88 #include "gromacs/mdlib/mdoutf.h"
89 #include "gromacs/mdlib/membed.h"
90 #include "gromacs/mdlib/resethandler.h"
91 #include "gromacs/mdlib/sighandler.h"
92 #include "gromacs/mdlib/simulationsignal.h"
93 #include "gromacs/mdlib/stat.h"
94 #include "gromacs/mdlib/stophandler.h"
95 #include "gromacs/mdlib/tgroup.h"
96 #include "gromacs/mdlib/trajectory_writing.h"
97 #include "gromacs/mdlib/update.h"
98 #include "gromacs/mdlib/update_constrain_cuda.h"
99 #include "gromacs/mdlib/vcm.h"
100 #include "gromacs/mdlib/vsite.h"
101 #include "gromacs/mdrunutility/handlerestart.h"
102 #include "gromacs/mdrunutility/multisim.h"
103 #include "gromacs/mdrunutility/printtime.h"
104 #include "gromacs/mdtypes/awh_history.h"
105 #include "gromacs/mdtypes/awh_params.h"
106 #include "gromacs/mdtypes/commrec.h"
107 #include "gromacs/mdtypes/df_history.h"
108 #include "gromacs/mdtypes/energyhistory.h"
109 #include "gromacs/mdtypes/fcdata.h"
110 #include "gromacs/mdtypes/forcerec.h"
111 #include "gromacs/mdtypes/group.h"
112 #include "gromacs/mdtypes/inputrec.h"
113 #include "gromacs/mdtypes/interaction_const.h"
114 #include "gromacs/mdtypes/md_enums.h"
115 #include "gromacs/mdtypes/mdatom.h"
116 #include "gromacs/mdtypes/mdrunoptions.h"
117 #include "gromacs/mdtypes/observableshistory.h"
118 #include "gromacs/mdtypes/pullhistory.h"
119 #include "gromacs/mdtypes/state.h"
120 #include "gromacs/nbnxm/nbnxm.h"
121 #include "gromacs/pbcutil/mshift.h"
122 #include "gromacs/pbcutil/pbc.h"
123 #include "gromacs/pulling/output.h"
124 #include "gromacs/pulling/pull.h"
125 #include "gromacs/swap/swapcoords.h"
126 #include "gromacs/timing/wallcycle.h"
127 #include "gromacs/timing/walltime_accounting.h"
128 #include "gromacs/topology/atoms.h"
129 #include "gromacs/topology/idef.h"
130 #include "gromacs/topology/mtop_util.h"
131 #include "gromacs/topology/topology.h"
132 #include "gromacs/trajectory/trajectoryframe.h"
133 #include "gromacs/utility/basedefinitions.h"
134 #include "gromacs/utility/cstringutil.h"
135 #include "gromacs/utility/fatalerror.h"
136 #include "gromacs/utility/logger.h"
137 #include "gromacs/utility/real.h"
138 #include "gromacs/utility/smalloc.h"
139
140 #include "legacysimulator.h"
141 #include "replicaexchange.h"
142 #include "shellfc.h"
143
144 #if GMX_FAHCORE
145 #include "corewrap.h"
146 #endif
147
148 using gmx::SimulationSignaller;
149
150 //! Whether the GPU versions of Leap-Frog integrator and LINCS and SETTLE constraints
151 static const bool c_useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
152
153 void gmx::LegacySimulator::do_md()
154 {
155     // TODO Historically, the EM and MD "integrators" used different
156     // names for the t_inputrec *parameter, but these must have the
157     // same name, now that it's a member of a struct. We use this ir
158     // alias to avoid a large ripple of nearly useless changes.
159     // t_inputrec is being replaced by IMdpOptionsProvider, so this
160     // will go away eventually.
161     t_inputrec             *ir   = inputrec;
162     int64_t                 step, step_rel;
163     double                  t, t0 = ir->init_t, lam0[efptNR];
164     gmx_bool                bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
165     gmx_bool                bNS, bNStList, bStopCM,
166                             bFirstStep, bInitStep, bLastStep = FALSE;
167     gmx_bool                bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
168     gmx_bool                do_ene, do_log, do_verbose;
169     gmx_bool                bMasterState;
170     unsigned int            force_flags;
171     tensor                  force_vir = {{0}}, shake_vir = {{0}}, total_vir = {{0}},
172                             tmp_vir   = {{0}}, pres = {{0}};
173     int                     i, m;
174     rvec                    mu_tot;
175     matrix                  parrinellorahmanMu, M;
176     gmx_repl_ex_t           repl_ex = nullptr;
177     gmx_localtop_t          top;
178     PaddedVector<gmx::RVec> f {};
179     gmx_global_stat_t       gstat;
180     t_graph                *graph = nullptr;
181     gmx_shellfc_t          *shellfc;
182     gmx_bool                bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
183     gmx_bool                bTemp, bPres, bTrotter;
184     real                    dvdl_constr;
185     std::vector<RVec>       cbuf;
186     matrix                  lastbox;
187     int                     lamnew  = 0;
188     /* for FEP */
189     int                     nstfep = 0;
190     double                  cycles;
191     real                    saved_conserved_quantity = 0;
192     real                    last_ekin                = 0;
193     t_extmass               MassQ;
194     char                    sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
195
196     /* PME load balancing data for GPU kernels */
197     gmx_bool              bPMETune         = FALSE;
198     gmx_bool              bPMETunePrinting = FALSE;
199
200     bool                  bInteractiveMDstep = false;
201
202     /* Domain decomposition could incorrectly miss a bonded
203        interaction, but checking for that requires a global
204        communication stage, which does not otherwise happen in DD
205        code. So we do that alongside the first global energy reduction
206        after a new DD is made. These variables handle whether the
207        check happens, and the result it returns. */
208     bool              shouldCheckNumberOfBondedInteractions = false;
209     int               totalNumberOfBondedInteractions       = -1;
210
211     SimulationSignals signals;
212     // Most global communnication stages don't propagate mdrun
213     // signals, and will use this object to achieve that.
214     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
215
216     if (!mdrunOptions.writeConfout)
217     {
218         // This is on by default, and the main known use case for
219         // turning it off is for convenience in benchmarking, which is
220         // something that should not show up in the general user
221         // interface.
222         GMX_LOG(mdlog.info).asParagraph().
223             appendText("The -noconfout functionality is deprecated, and may be removed in a future version.");
224     }
225
226     /* md-vv uses averaged full step velocities for T-control
227        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
228        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
229     bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
230
231     const bool bRerunMD      = false;
232
233     int        nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
234     bGStatEveryStep = (nstglobalcomm == 1);
235
236     SimulationGroups                  *groups = &top_global->groups;
237
238     std::unique_ptr<EssentialDynamics> ed = nullptr;
239     if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr)
240     {
241         /* Initialize essential dynamics sampling */
242         ed = init_edsam(mdlog,
243                         opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm),
244                         top_global,
245                         ir, cr, constr,
246                         state_global, observablesHistory,
247                         oenv,
248                         startingBehavior);
249     }
250
251     initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
252     Update upd(ir, deform);
253     bool   doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
254     if (startingBehavior != StartingBehavior::RestartWithAppending)
255     {
256         pleaseCiteCouplingAlgorithms(fplog, *ir);
257     }
258     gmx_mdoutf       *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir, top_global, oenv, wcycle,
259                                          StartingBehavior::NewSimulation);
260     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, mdoutf_get_fp_dhdl(outf), false);
261
262     gstat = global_stat_init(ir);
263
264     /* Check for polarizable models and flexible constraints */
265     shellfc = init_shell_flexcon(fplog,
266                                  top_global, constr ? constr->numFlexibleConstraints() : 0,
267                                  ir->nstcalcenergy, DOMAINDECOMP(cr));
268
269     {
270         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
271         if ((io > 2000) && MASTER(cr))
272         {
273             fprintf(stderr,
274                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
275                     io);
276         }
277     }
278
279     // Local state only becomes valid now.
280     std::unique_ptr<t_state> stateInstance;
281     t_state *                state;
282
283
284     auto mdatoms = mdAtoms->mdatoms();
285
286     std::unique_ptr<UpdateConstrainCuda> integrator;
287
288     if (DOMAINDECOMP(cr))
289     {
290         dd_init_local_top(*top_global, &top);
291
292         stateInstance = std::make_unique<t_state>();
293         state         = stateInstance.get();
294         dd_init_local_state(cr->dd, state_global, state);
295
296         /* Distribute the charge groups over the nodes from the master node */
297         dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1,
298                             state_global, *top_global, ir, imdSession,
299                             pull_work,
300                             state, &f, mdAtoms, &top, fr,
301                             vsite, constr,
302                             nrnb, nullptr, FALSE);
303         shouldCheckNumberOfBondedInteractions = true;
304         upd.setNumAtoms(state->natoms);
305     }
306     else
307     {
308         state_change_natoms(state_global, state_global->natoms);
309         f.resizeWithPadding(state_global->natoms);
310         /* Copy the pointer to the global state */
311         state = state_global;
312
313         /* Generate and initialize new topology */
314         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr,
315                                   &graph, mdAtoms, constr, vsite, shellfc);
316
317         upd.setNumAtoms(state->natoms);
318     }
319
320     if (c_useGpuUpdateConstrain)
321     {
322         GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU.");
323         GMX_RELEASE_ASSERT(ir->etc != etcNOSEHOOVER, "Nose Hoover temperature coupling is not supported on the GPU.");
324         GMX_RELEASE_ASSERT(ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN, "Only Parrinello Rahman pressure control is supported on the GPU.");
325         GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU");
326         GMX_RELEASE_ASSERT(ed == nullptr, "Essential dynamics is not supported with GPU-based update constraints.");
327         GMX_LOG(mdlog.info).asParagraph().
328             appendText("Using CUDA GPU-based update and constraints module.");
329         integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global);
330         integrator->set(top.idef, *mdatoms, ekind->ngtc);
331         t_pbc pbc;
332         set_pbc(&pbc, epbcXYZ, state->box);
333         integrator->setPbc(&pbc);
334     }
335
336     if (fr->nbv->useGpu())
337     {
338         changePinningPolicy(&state->x, gmx::PinningPolicy::PinnedIfSupported);
339     }
340
341     // NOTE: The global state is no longer used at this point.
342     // But state_global is still used as temporary storage space for writing
343     // the global state to file and potentially for replica exchange.
344     // (Global topology should persist.)
345
346     update_mdatoms(mdatoms, state->lambda[efptMASS]);
347
348     if (ir->bExpanded)
349     {
350         /* Check nstexpanded here, because the grompp check was broken */
351         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
352         {
353             gmx_fatal(FARGS, "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
354         }
355         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation,
356                                ir, state->dfhist);
357     }
358
359     if (MASTER(cr))
360     {
361         if (startingBehavior != StartingBehavior::NewSimulation)
362         {
363             /* Restore from energy history if appending to output files */
364             if (startingBehavior == StartingBehavior::RestartWithAppending)
365             {
366                 /* If no history is available (because a checkpoint is from before
367                  * it was written) make a new one later, otherwise restore it.
368                  */
369                 if (observablesHistory->energyHistory)
370                 {
371                     energyOutput.restoreFromEnergyHistory(*observablesHistory->energyHistory);
372                 }
373             }
374             else if (observablesHistory->energyHistory)
375             {
376                 /* We might have read an energy history from checkpoint.
377                  * As we are not appending, we want to restart the statistics.
378                  * Free the allocated memory and reset the counts.
379                  */
380                 observablesHistory->energyHistory = {};
381                 /* We might have read a pull history from checkpoint.
382                  * We will still want to keep the statistics, so that the files
383                  * can be joined and still be meaningful.
384                  * This means that observablesHistory->pullHistory
385                  * should not be reset.
386                  */
387             }
388         }
389         if (!observablesHistory->energyHistory)
390         {
391             observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
392         }
393         if (!observablesHistory->pullHistory)
394         {
395             observablesHistory->pullHistory = std::make_unique<PullHistory>();
396         }
397         /* Set the initial energy history */
398         energyOutput.fillEnergyHistory(observablesHistory->energyHistory.get());
399     }
400
401     preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
402                            startingBehavior != StartingBehavior::NewSimulation);
403
404     // TODO: Remove this by converting AWH into a ForceProvider
405     auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingBehavior != StartingBehavior::NewSimulation,
406                                 shellfc != nullptr,
407                                 opt2fn("-awh", nfile, fnm), pull_work);
408
409     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
410     if (useReplicaExchange && MASTER(cr))
411     {
412         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir,
413                                         replExParams);
414     }
415     /* PME tuning is only supported in the Verlet scheme, with PME for
416      * Coulomb. It is not supported with only LJ PME. */
417     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) &&
418                 !mdrunOptions.reproducible && ir->cutoff_scheme != ecutsGROUP);
419
420     pme_load_balancing_t *pme_loadbal      = nullptr;
421     if (bPMETune)
422     {
423         pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
424                          *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu(),
425                          &bPMETunePrinting);
426     }
427
428     if (!ir->bContinuation)
429     {
430         if (state->flags & (1U << estV))
431         {
432             auto v = makeArrayRef(state->v);
433             /* Set the velocities of vsites, shells and frozen atoms to zero */
434             for (i = 0; i < mdatoms->homenr; i++)
435             {
436                 if (mdatoms->ptype[i] == eptVSite ||
437                     mdatoms->ptype[i] == eptShell)
438                 {
439                     clear_rvec(v[i]);
440                 }
441                 else if (mdatoms->cFREEZE)
442                 {
443                     for (m = 0; m < DIM; m++)
444                     {
445                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
446                         {
447                             v[i][m] = 0;
448                         }
449                     }
450                 }
451             }
452         }
453
454         if (constr)
455         {
456             /* Constrain the initial coordinates and velocities */
457             do_constrain_first(fplog, constr, ir, mdatoms,
458                                state->natoms,
459                                state->x.arrayRefWithPadding(),
460                                state->v.arrayRefWithPadding(),
461                                state->box, state->lambda[efptBONDED]);
462         }
463         if (vsite)
464         {
465             /* Construct the virtual sites for the initial configuration */
466             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr,
467                              top.idef.iparams, top.idef.il,
468                              fr->ePBC, fr->bMolPBC, cr, state->box);
469         }
470     }
471
472     if (ir->efep != efepNO)
473     {
474         /* Set free energy calculation frequency as the greatest common
475          * denominator of nstdhdl and repl_ex_nst. */
476         nstfep = ir->fepvals->nstdhdl;
477         if (ir->bExpanded)
478         {
479             nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
480         }
481         if (useReplicaExchange)
482         {
483             nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
484         }
485     }
486
487     /* Be REALLY careful about what flags you set here. You CANNOT assume
488      * this is the first step, since we might be restarting from a checkpoint,
489      * and in that case we should not do any modifications to the state.
490      */
491     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
492
493     // When restarting from a checkpoint, it can be appropriate to
494     // initialize ekind from quantities in the checkpoint. Otherwise,
495     // compute_globals must initialize ekind before the simulation
496     // starts/restarts. However, only the master rank knows what was
497     // found in the checkpoint file, so we have to communicate in
498     // order to coordinate the restart.
499     //
500     // TODO Consider removing this communication if/when checkpoint
501     // reading directly follows .tpr reading, because all ranks can
502     // agree on hasReadEkinState at that time.
503     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
504     if (PAR(cr))
505     {
506         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
507     }
508     if (hasReadEkinState)
509     {
510         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
511     }
512
513     unsigned int cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
514                                | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
515                                | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0)
516                                | (hasReadEkinState ? CGLO_READEKIN : 0));
517
518     bSumEkinhOld = FALSE;
519
520     t_vcm vcm(top_global->groups, *ir);
521     reportComRemovalInfo(fplog, vcm);
522
523     /* To minimize communication, compute_globals computes the COM velocity
524      * and the kinetic energy for the velocities without COM motion removed.
525      * Thus to get the kinetic energy without the COM contribution, we need
526      * to call compute_globals twice.
527      */
528     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
529     {
530         unsigned int cglo_flags_iteration = cglo_flags;
531         if (bStopCM && cgloIteration == 0)
532         {
533             cglo_flags_iteration |= CGLO_STOPCM;
534             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
535         }
536         compute_globals(gstat, cr, ir, fr, ekind,
537                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
538                         mdatoms, nrnb, &vcm,
539                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
540                         constr, &nullSignaller, state->box,
541                         &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags_iteration
542                         | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0));
543         if (cglo_flags_iteration & CGLO_STOPCM)
544         {
545             /* At initialization, do not pass x with acceleration-correction mode
546              * to avoid (incorrect) correction of the initial coordinates.
547              */
548             rvec *xPtr = nullptr;
549             if (vcm.mode != ecmLINEAR_ACCELERATION_CORRECTION)
550             {
551                 xPtr = state->x.rvec_array();
552             }
553             process_and_stopcm_grp(fplog, &vcm, *mdatoms, xPtr, state->v.rvec_array());
554             inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
555         }
556     }
557     checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
558                                     top_global, &top, state->x.rvec_array(), state->box,
559                                     &shouldCheckNumberOfBondedInteractions);
560     if (ir->eI == eiVVAK)
561     {
562         /* a second call to get the half step temperature initialized as well */
563         /* we do the same call as above, but turn the pressure off -- internally to
564            compute_globals, this is recognized as a velocity verlet half-step
565            kinetic energy calculation.  This minimized excess variables, but
566            perhaps loses some logic?*/
567
568         compute_globals(gstat, cr, ir, fr, ekind,
569                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
570                         mdatoms, nrnb, &vcm,
571                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
572                         constr, &nullSignaller, state->box,
573                         nullptr, &bSumEkinhOld,
574                         cglo_flags & ~CGLO_PRESSURE);
575     }
576
577     /* Calculate the initial half step temperature, and save the ekinh_old */
578     if (startingBehavior == StartingBehavior::NewSimulation)
579     {
580         for (i = 0; (i < ir->opts.ngtc); i++)
581         {
582             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
583         }
584     }
585
586     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
587        temperature control */
588     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
589
590     if (MASTER(cr))
591     {
592         if (!ir->bContinuation)
593         {
594             if (constr && ir->eConstrAlg == econtLINCS)
595             {
596                 fprintf(fplog,
597                         "RMS relative constraint deviation after constraining: %.2e\n",
598                         constr->rmsd());
599             }
600             if (EI_STATE_VELOCITY(ir->eI))
601             {
602                 real temp = enerd->term[F_TEMP];
603                 if (ir->eI != eiVV)
604                 {
605                     /* Result of Ekin averaged over velocities of -half
606                      * and +half step, while we only have -half step here.
607                      */
608                     temp *= 2;
609                 }
610                 fprintf(fplog, "Initial temperature: %g K\n", temp);
611             }
612         }
613
614         char tbuf[20];
615         fprintf(stderr, "starting mdrun '%s'\n",
616                 *(top_global->name));
617         if (ir->nsteps >= 0)
618         {
619             sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
620         }
621         else
622         {
623             sprintf(tbuf, "%s", "infinite");
624         }
625         if (ir->init_step > 0)
626         {
627             fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
628                     gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
629                     gmx_step_str(ir->init_step, sbuf2),
630                     ir->init_step*ir->delta_t);
631         }
632         else
633         {
634             fprintf(stderr, "%s steps, %s ps.\n",
635                     gmx_step_str(ir->nsteps, sbuf), tbuf);
636         }
637         fprintf(fplog, "\n");
638     }
639
640     walltime_accounting_start_time(walltime_accounting);
641     wallcycle_start(wcycle, ewcRUN);
642     print_start(fplog, cr, walltime_accounting, "mdrun");
643
644 #if GMX_FAHCORE
645     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
646     int chkpt_ret = fcCheckPointParallel( cr->nodeid,
647                                           NULL, 0);
648     if (chkpt_ret == 0)
649     {
650         gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
651     }
652 #endif
653
654     /***********************************************************
655      *
656      *             Loop over MD steps
657      *
658      ************************************************************/
659
660     bFirstStep       = TRUE;
661     /* Skip the first Nose-Hoover integration when we get the state from tpx */
662     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
663     bSumEkinhOld     = FALSE;
664     bExchanged       = FALSE;
665     bNeedRepartition = FALSE;
666
667     bool simulationsShareState = false;
668     int  nstSignalComm         = nstglobalcomm;
669     {
670         // TODO This implementation of ensemble orientation restraints is nasty because
671         // a user can't just do multi-sim with single-sim orientation restraints.
672         bool usingEnsembleRestraints = (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
673         bool awhUsesMultiSim         = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
674
675         // Replica exchange, ensemble restraints and AWH need all
676         // simulations to remain synchronized, so they need
677         // checkpoints and stop conditions to act on the same step, so
678         // the propagation of such signals must take place between
679         // simulations, not just within simulations.
680         // TODO: Make algorithm initializers set these flags.
681         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
682
683         if (simulationsShareState)
684         {
685             // Inter-simulation signal communication does not need to happen
686             // often, so we use a minimum of 200 steps to reduce overhead.
687             const int c_minimumInterSimulationSignallingInterval = 200;
688             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1)/nstglobalcomm)*nstglobalcomm;
689         }
690     }
691
692     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
693                 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
694                 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
695                 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
696
697     auto checkpointHandler = std::make_unique<CheckpointHandler>(
698                 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
699                 simulationsShareState, ir->nstlist == 0, MASTER(cr),
700                 mdrunOptions.writeConfout, mdrunOptions.checkpointOptions.period);
701
702     const bool resetCountersIsLocal = true;
703     auto       resetHandler         = std::make_unique<ResetHandler>(
704                 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]), !resetCountersIsLocal,
705                 ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
706                 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
707
708     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
709
710     step     = ir->init_step;
711     step_rel = 0;
712
713     // TODO extract this to new multi-simulation module
714     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
715     {
716         if (!multisim_int_all_are_equal(ms, ir->nsteps))
717         {
718             GMX_LOG(mdlog.warning).appendText(
719                     "Note: The number of steps is not consistent across multi simulations,\n"
720                     "but we are proceeding anyway!");
721         }
722         if (!multisim_int_all_are_equal(ms, ir->init_step))
723         {
724             GMX_LOG(mdlog.warning).appendText(
725                     "Note: The initial step is not consistent across multi simulations,\n"
726                     "but we are proceeding anyway!");
727         }
728     }
729
730     /* and stop now if we should */
731     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
732     while (!bLastStep)
733     {
734
735         /* Determine if this is a neighbor search step */
736         bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
737
738         if (bPMETune && bNStList)
739         {
740             /* PME grid + cut-off optimization with GPUs or PME nodes */
741             pme_loadbal_do(pme_loadbal, cr,
742                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
743                            fplog, mdlog,
744                            *ir, fr, *state,
745                            wcycle,
746                            step, step_rel,
747                            &bPMETunePrinting);
748         }
749
750         wallcycle_start(wcycle, ewcSTEP);
751
752         bLastStep = (step_rel == ir->nsteps);
753         t         = t0 + step*ir->delta_t;
754
755         // TODO Refactor this, so that nstfep does not need a default value of zero
756         if (ir->efep != efepNO || ir->bSimTemp)
757         {
758             /* find and set the current lambdas */
759             setCurrentLambdasLocal(step, ir->fepvals, lam0, state);
760
761             bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
762             bDoFEP       = ((ir->efep != efepNO) && do_per_step(step, nstfep));
763             bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
764                             && (ir->bExpanded) && (step > 0) &&
765                             (startingBehavior == StartingBehavior::NewSimulation));
766         }
767
768         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep &&
769                      do_per_step(step, replExParams.exchangeInterval));
770
771         if (doSimulatedAnnealing)
772         {
773             update_annealing_target_temp(ir, t, &upd);
774         }
775
776         /* Stop Center of Mass motion */
777         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
778
779         /* Determine whether or not to do Neighbour Searching */
780         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
781
782         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
783
784         /* do_log triggers energy and virial calculation. Because this leads
785          * to different code paths, forces can be different. Thus for exact
786          * continuation we should avoid extra log output.
787          * Note that the || bLastStep can result in non-exact continuation
788          * beyond the last step. But we don't consider that to be an issue.
789          */
790         do_log     =
791             (do_per_step(step, ir->nstlog) ||
792              (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) ||
793              bLastStep);
794         do_verbose = mdrunOptions.verbose &&
795             (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
796
797         if (bNS && !(bFirstStep && ir->bContinuation))
798         {
799             bMasterState = FALSE;
800             /* Correct the new box if it is too skewed */
801             if (inputrecDynamicBox(ir))
802             {
803                 if (correct_box(fplog, step, state->box, graph))
804                 {
805                     bMasterState = TRUE;
806                 }
807             }
808             if (DOMAINDECOMP(cr) && bMasterState)
809             {
810                 dd_collect_state(cr->dd, state, state_global);
811             }
812
813             if (DOMAINDECOMP(cr))
814             {
815                 /* Repartition the domain decomposition */
816                 dd_partition_system(fplog, mdlog, step, cr,
817                                     bMasterState, nstglobalcomm,
818                                     state_global, *top_global, ir, imdSession,
819                                     pull_work,
820                                     state, &f, mdAtoms, &top, fr,
821                                     vsite, constr,
822                                     nrnb, wcycle,
823                                     do_verbose && !bPMETunePrinting);
824                 shouldCheckNumberOfBondedInteractions = true;
825                 upd.setNumAtoms(state->natoms);
826             }
827         }
828
829         if (MASTER(cr) && do_log)
830         {
831             energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
832         }
833
834         if (ir->efep != efepNO)
835         {
836             update_mdatoms(mdatoms, state->lambda[efptMASS]);
837         }
838
839         if (bExchanged)
840         {
841
842             /* We need the kinetic energy at minus the half step for determining
843              * the full step kinetic energy and possibly for T-coupling.*/
844             /* This may not be quite working correctly yet . . . . */
845             compute_globals(gstat, cr, ir, fr, ekind,
846                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
847                             mdatoms, nrnb, &vcm,
848                             wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
849                             constr, &nullSignaller, state->box,
850                             &totalNumberOfBondedInteractions, &bSumEkinhOld,
851                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
852             checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
853                                             top_global, &top, state->x.rvec_array(), state->box,
854                                             &shouldCheckNumberOfBondedInteractions);
855         }
856         clear_mat(force_vir);
857
858         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
859
860         /* Determine the energy and pressure:
861          * at nstcalcenergy steps and at energy output steps (set below).
862          */
863         if (EI_VV(ir->eI) && (!bInitStep))
864         {
865             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
866             bCalcVir      = bCalcEnerStep ||
867                 (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
868         }
869         else
870         {
871             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
872             bCalcVir      = bCalcEnerStep ||
873                 (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
874         }
875         bCalcEner = bCalcEnerStep;
876
877         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
878
879         if (do_ene || do_log || bDoReplEx)
880         {
881             bCalcVir  = TRUE;
882             bCalcEner = TRUE;
883         }
884
885         /* Do we need global communication ? */
886         bGStat = (bCalcVir || bCalcEner || bStopCM ||
887                   do_per_step(step, nstglobalcomm) ||
888                   (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm)));
889
890         force_flags = (GMX_FORCE_STATECHANGED |
891                        ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) |
892                        GMX_FORCE_ALLFORCES |
893                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
894                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
895                        (bDoFEP ? GMX_FORCE_DHDL : 0)
896                        );
897
898         if (shellfc)
899         {
900             /* Now is the time to relax the shells */
901             relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose,
902                                 enforcedRotation, step,
903                                 ir, imdSession, pull_work, bNS, force_flags, &top,
904                                 constr, enerd, fcd,
905                                 state->natoms,
906                                 state->x.arrayRefWithPadding(),
907                                 state->v.arrayRefWithPadding(),
908                                 state->box,
909                                 state->lambda,
910                                 &state->hist,
911                                 f.arrayRefWithPadding(), force_vir, mdatoms,
912                                 nrnb, wcycle, graph,
913                                 shellfc, fr, mdScheduleWork, t, mu_tot,
914                                 vsite,
915                                 ddBalanceRegionHandler);
916         }
917         else
918         {
919             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias is updated
920                (or the AWH update will be performed twice for one step when continuing). It would be best to
921                call this update function from do_md_trajectory_writing but that would occur after do_force.
922                One would have to divide the update_awh function into one function applying the AWH force
923                and one doing the AWH bias update. The update AWH bias function could then be called after
924                do_md_trajectory_writing (then containing update_awh_history).
925                The checkpointing will in the future probably moved to the start of the md loop which will
926                rid of this issue. */
927             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
928             {
929                 awh->updateHistory(state_global->awhHistory.get());
930             }
931
932             /* The coordinates (x) are shifted (to get whole molecules)
933              * in do_force.
934              * This is parallellized as well, and does communication too.
935              * Check comments in sim_util.c
936              */
937             do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession,
938                      pull_work,
939                      step, nrnb, wcycle, &top,
940                      state->box, state->x.arrayRefWithPadding(), &state->hist,
941                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
942                      state->lambda, graph,
943                      fr, mdScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
944                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
945                      ddBalanceRegionHandler);
946         }
947
948         // VV integrators do not need the following velocity half step
949         // if it is the first step after starting from a checkpoint.
950         // That is, the half step is needed on all other steps, and
951         // also the first step when starting from a .tpr file.
952         if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
953         /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
954         {
955             rvec *vbuf = nullptr;
956
957             wallcycle_start(wcycle, ewcUPDATE);
958             if (ir->eI == eiVV && bInitStep)
959             {
960                 /* if using velocity verlet with full time step Ekin,
961                  * take the first half step only to compute the
962                  * virial for the first step. From there,
963                  * revert back to the initial coordinates
964                  * so that the input is actually the initial step.
965                  */
966                 snew(vbuf, state->natoms);
967                 copy_rvecn(state->v.rvec_array(), vbuf, 0, state->natoms); /* should make this better for parallelizing? */
968             }
969             else
970             {
971                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
972                 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
973             }
974
975             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
976                           ekind, M, &upd, etrtVELOCITY1,
977                           cr, constr);
978
979             wallcycle_stop(wcycle, ewcUPDATE);
980             constrain_velocities(step, nullptr,
981                                  state,
982                                  shake_vir,
983                                  constr,
984                                  bCalcVir, do_log, do_ene);
985             wallcycle_start(wcycle, ewcUPDATE);
986             /* if VV, compute the pressure and constraints */
987             /* For VV2, we strictly only need this if using pressure
988              * control, but we really would like to have accurate pressures
989              * printed out.
990              * Think about ways around this in the future?
991              * For now, keep this choice in comments.
992              */
993             /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
994             /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
995             bPres = TRUE;
996             bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
997             if (bCalcEner && ir->eI == eiVVAK)
998             {
999                 bSumEkinhOld = TRUE;
1000             }
1001             /* for vv, the first half of the integration actually corresponds to the previous step.
1002                So we need information from the last step in the first half of the integration */
1003             if (bGStat || do_per_step(step-1, nstglobalcomm))
1004             {
1005                 wallcycle_stop(wcycle, ewcUPDATE);
1006                 compute_globals(gstat, cr, ir, fr, ekind,
1007                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1008                                 mdatoms, nrnb, &vcm,
1009                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1010                                 constr, &nullSignaller, state->box,
1011                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1012                                 (bGStat ? CGLO_GSTAT : 0)
1013                                 | (bCalcEner ? CGLO_ENERGY : 0)
1014                                 | (bTemp ? CGLO_TEMPERATURE : 0)
1015                                 | (bPres ? CGLO_PRESSURE : 0)
1016                                 | (bPres ? CGLO_CONSTRAINT : 0)
1017                                 | (bStopCM ? CGLO_STOPCM : 0)
1018                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1019                                 | CGLO_SCALEEKIN
1020                                 );
1021                 /* explanation of above:
1022                    a) We compute Ekin at the full time step
1023                    if 1) we are using the AveVel Ekin, and it's not the
1024                    initial step, or 2) if we are using AveEkin, but need the full
1025                    time step kinetic energy for the pressure (always true now, since we want accurate statistics).
1026                    b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
1027                    EkinAveVel because it's needed for the pressure */
1028                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1029                                                 top_global, &top, state->x.rvec_array(), state->box,
1030                                                 &shouldCheckNumberOfBondedInteractions);
1031                 if (bStopCM)
1032                 {
1033                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1034                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1035                 }
1036                 wallcycle_start(wcycle, ewcUPDATE);
1037             }
1038             /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1039             if (!bInitStep)
1040             {
1041                 if (bTrotter)
1042                 {
1043                     m_add(force_vir, shake_vir, total_vir);     /* we need the un-dispersion corrected total vir here */
1044                     trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
1045
1046                     /* TODO This is only needed when we're about to write
1047                      * a checkpoint, because we use it after the restart
1048                      * (in a kludge?). But what should we be doing if
1049                      * the startingBehavior is NewSimulation or bInitStep are true? */
1050                     if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
1051                     {
1052                         copy_mat(shake_vir, state->svir_prev);
1053                         copy_mat(force_vir, state->fvir_prev);
1054                     }
1055                     if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
1056                     {
1057                         /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1058                         enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
1059                         enerd->term[F_EKIN] = trace(ekind->ekin);
1060                     }
1061                 }
1062                 else if (bExchanged)
1063                 {
1064                     wallcycle_stop(wcycle, ewcUPDATE);
1065                     /* We need the kinetic energy at minus the half step for determining
1066                      * the full step kinetic energy and possibly for T-coupling.*/
1067                     /* This may not be quite working correctly yet . . . . */
1068                     compute_globals(gstat, cr, ir, fr, ekind,
1069                                     state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1070                                     mdatoms, nrnb, &vcm,
1071                                     wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
1072                                     constr, &nullSignaller, state->box,
1073                                     nullptr, &bSumEkinhOld,
1074                                     CGLO_GSTAT | CGLO_TEMPERATURE);
1075                     wallcycle_start(wcycle, ewcUPDATE);
1076                 }
1077             }
1078             /* if it's the initial step, we performed this first step just to get the constraint virial */
1079             if (ir->eI == eiVV && bInitStep)
1080             {
1081                 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
1082                 sfree(vbuf);
1083             }
1084             wallcycle_stop(wcycle, ewcUPDATE);
1085         }
1086
1087         /* compute the conserved quantity */
1088         if (EI_VV(ir->eI))
1089         {
1090             saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1091             if (ir->eI == eiVV)
1092             {
1093                 last_ekin = enerd->term[F_EKIN];
1094             }
1095             if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1096             {
1097                 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1098             }
1099             /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
1100             if (ir->efep != efepNO)
1101             {
1102                 sum_dhdl(enerd, state->lambda, ir->fepvals);
1103             }
1104         }
1105
1106         /* ########  END FIRST UPDATE STEP  ############## */
1107         /* ########  If doing VV, we now have v(dt) ###### */
1108         if (bDoExpanded)
1109         {
1110             /* perform extended ensemble sampling in lambda - we don't
1111                actually move to the new state before outputting
1112                statistics, but if performing simulated tempering, we
1113                do update the velocities and the tau_t. */
1114
1115             lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, state->dfhist, step, state->v.rvec_array(), mdatoms);
1116             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1117             if (MASTER(cr))
1118             {
1119                 copy_df_history(state_global->dfhist, state->dfhist);
1120             }
1121         }
1122
1123         /* Now we have the energies and forces corresponding to the
1124          * coordinates at time t. We must output all of this before
1125          * the update.
1126          */
1127         do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
1128                                  ir, state, state_global, observablesHistory,
1129                                  top_global, fr,
1130                                  outf, energyOutput, ekind, f,
1131                                  checkpointHandler->isCheckpointingStep(),
1132                                  bRerunMD, bLastStep,
1133                                  mdrunOptions.writeConfout,
1134                                  bSumEkinhOld);
1135         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1136         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1137
1138         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1139         if (startingBehavior != StartingBehavior::NewSimulation &&
1140             (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1141         {
1142             copy_mat(state->svir_prev, shake_vir);
1143             copy_mat(state->fvir_prev, force_vir);
1144         }
1145
1146         stopHandler->setSignal();
1147         resetHandler->setSignal(walltime_accounting);
1148
1149         if (bGStat || !PAR(cr))
1150         {
1151             /* In parallel we only have to check for checkpointing in steps
1152              * where we do global communication,
1153              *  otherwise the other nodes don't know.
1154              */
1155             checkpointHandler->setSignal(walltime_accounting);
1156         }
1157
1158         /* #########   START SECOND UPDATE STEP ################# */
1159
1160         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
1161            in preprocessing */
1162
1163         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1164         {
1165             gmx_bool bIfRandomize;
1166             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1167             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1168             if (constr && bIfRandomize)
1169             {
1170                 constrain_velocities(step, nullptr,
1171                                      state,
1172                                      tmp_vir,
1173                                      constr,
1174                                      bCalcVir, do_log, do_ene);
1175             }
1176         }
1177         /* Box is changed in update() when we do pressure coupling,
1178          * but we should still use the old box for energy corrections and when
1179          * writing it to the energy file, so it matches the trajectory files for
1180          * the same timestep above. Make a copy in a separate array.
1181          */
1182         copy_mat(state->box, lastbox);
1183
1184         dvdl_constr = 0;
1185
1186         wallcycle_start(wcycle, ewcUPDATE);
1187         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1188         if (bTrotter)
1189         {
1190             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1191             /* We can only do Berendsen coupling after we have summed
1192              * the kinetic energy or virial. Since the happens
1193              * in global_state after update, we should only do it at
1194              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1195              */
1196         }
1197         else
1198         {
1199             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1200             update_pcouple_before_coordinates(fplog, step, ir, state,
1201                                               parrinellorahmanMu, M,
1202                                               bInitStep);
1203         }
1204
1205         if (EI_VV(ir->eI))
1206         {
1207             /* velocity half-step update */
1208             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1209                           ekind, M, &upd, etrtVELOCITY2,
1210                           cr, constr);
1211         }
1212
1213         /* Above, initialize just copies ekinh into ekin,
1214          * it doesn't copy position (for VV),
1215          * and entire integrator for MD.
1216          */
1217
1218         if (ir->eI == eiVVAK)
1219         {
1220             cbuf.resize(state->x.size());
1221             std::copy(state->x.begin(), state->x.end(), cbuf.begin());
1222         }
1223
1224         if (c_useGpuUpdateConstrain)
1225         {
1226             if (bNS)
1227             {
1228                 integrator->set(top.idef, *mdatoms, ekind->ngtc);
1229                 t_pbc pbc;
1230                 set_pbc(&pbc, epbcXYZ, state->box);
1231                 integrator->setPbc(&pbc);
1232             }
1233             integrator->copyCoordinatesToGpu(state->x.rvec_array());
1234             integrator->copyVelocitiesToGpu(state->v.rvec_array());
1235             integrator->copyForcesToGpu(as_rvec_array(f.data()));
1236
1237             bool doTempCouple     = (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1238             bool doPressureCouple = (ir->epc == epcPARRINELLORAHMAN && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1239
1240             // This applies Leap-Frog, LINCS and SETTLE in succession
1241             integrator->integrate(ir->delta_t, true, bCalcVir, shake_vir,
1242                                   doTempCouple, ekind->tcstat,
1243                                   doPressureCouple, ir->nstpcouple*ir->delta_t, M);
1244
1245             integrator->copyCoordinatesFromGpu(state->x.rvec_array());
1246             integrator->copyVelocitiesFromGpu(state->v.rvec_array());
1247         }
1248         else
1249         {
1250             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1251                           ekind, M, &upd, etrtPOSITION, cr, constr);
1252
1253             wallcycle_stop(wcycle, ewcUPDATE);
1254
1255             constrain_coordinates(step, &dvdl_constr, state,
1256                                   shake_vir,
1257                                   &upd, constr,
1258                                   bCalcVir, do_log, do_ene);
1259
1260             update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state,
1261                                   cr, nrnb, wcycle, &upd, constr, do_log, do_ene);
1262             finish_update(ir, mdatoms,
1263                           state, graph,
1264                           nrnb, wcycle, &upd, constr);
1265         }
1266
1267         if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1268         {
1269             updatePrevStepPullCom(pull_work, state);
1270         }
1271
1272         if (ir->eI == eiVVAK)
1273         {
1274             /* erase F_EKIN and F_TEMP here? */
1275             /* just compute the kinetic energy at the half step to perform a trotter step */
1276             compute_globals(gstat, cr, ir, fr, ekind,
1277                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1278                             mdatoms, nrnb, &vcm,
1279                             wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1280                             constr, &nullSignaller, lastbox,
1281                             nullptr, &bSumEkinhOld,
1282                             (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE
1283                             );
1284             wallcycle_start(wcycle, ewcUPDATE);
1285             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1286             /* now we know the scaling, we can compute the positions again */
1287             std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
1288
1289             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1290                           ekind, M, &upd, etrtPOSITION, cr, constr);
1291             wallcycle_stop(wcycle, ewcUPDATE);
1292
1293             /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1294             /* are the small terms in the shake_vir here due
1295              * to numerical errors, or are they important
1296              * physically? I'm thinking they are just errors, but not completely sure.
1297              * For now, will call without actually constraining, constr=NULL*/
1298             finish_update(ir, mdatoms,
1299                           state, graph,
1300                           nrnb, wcycle, &upd, nullptr);
1301         }
1302         if (EI_VV(ir->eI))
1303         {
1304             /* this factor or 2 correction is necessary
1305                because half of the constraint force is removed
1306                in the vv step, so we have to double it.  See
1307                the Redmine issue #1255.  It is not yet clear
1308                if the factor of 2 is exact, or just a very
1309                good approximation, and this will be
1310                investigated.  The next step is to see if this
1311                can be done adding a dhdl contribution from the
1312                rattle step, but this is somewhat more
1313                complicated with the current code. Will be
1314                investigated, hopefully for 4.6.3. However,
1315                this current solution is much better than
1316                having it completely wrong.
1317              */
1318             enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
1319         }
1320         else
1321         {
1322             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1323         }
1324
1325         if (vsite != nullptr)
1326         {
1327             wallcycle_start(wcycle, ewcVSITECONSTR);
1328             if (graph != nullptr)
1329             {
1330                 shift_self(graph, state->box, state->x.rvec_array());
1331             }
1332             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
1333                              top.idef.iparams, top.idef.il,
1334                              fr->ePBC, fr->bMolPBC, cr, state->box);
1335
1336             if (graph != nullptr)
1337             {
1338                 unshift_self(graph, state->box, state->x.rvec_array());
1339             }
1340             wallcycle_stop(wcycle, ewcVSITECONSTR);
1341         }
1342
1343         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1344         /* With Leap-Frog we can skip compute_globals at
1345          * non-communication steps, but we need to calculate
1346          * the kinetic energy one step before communication.
1347          */
1348         {
1349             // Organize to do inter-simulation signalling on steps if
1350             // and when algorithms require it.
1351             bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1352
1353             if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal)
1354             {
1355                 // Since we're already communicating at this step, we
1356                 // can propagate intra-simulation signals. Note that
1357                 // check_nstglobalcomm has the responsibility for
1358                 // choosing the value of nstglobalcomm that is one way
1359                 // bGStat becomes true, so we can't get into a
1360                 // situation where e.g. checkpointing can't be
1361                 // signalled.
1362                 bool                doIntraSimSignal = true;
1363                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1364
1365                 compute_globals(gstat, cr, ir, fr, ekind,
1366                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1367                                 mdatoms, nrnb, &vcm,
1368                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1369                                 constr, &signaller,
1370                                 lastbox,
1371                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1372                                 (bGStat ? CGLO_GSTAT : 0)
1373                                 | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1374                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1375                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1376                                 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
1377                                 | CGLO_CONSTRAINT
1378                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1379                                 );
1380                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1381                                                 top_global, &top, state->x.rvec_array(), state->box,
1382                                                 &shouldCheckNumberOfBondedInteractions);
1383                 if (!EI_VV(ir->eI) && bStopCM)
1384                 {
1385                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1386                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1387                 }
1388             }
1389         }
1390
1391         /* #############  END CALC EKIN AND PRESSURE ################# */
1392
1393         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1394            the virial that should probably be addressed eventually. state->veta has better properies,
1395            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1396            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1397
1398         if (ir->efep != efepNO && !EI_VV(ir->eI))
1399         {
1400             /* Sum up the foreign energy and dhdl terms for md and sd.
1401                Currently done every step so that dhdl is correct in the .edr */
1402             sum_dhdl(enerd, state->lambda, ir->fepvals);
1403         }
1404
1405         update_pcouple_after_coordinates(fplog, step, ir, mdatoms,
1406                                          pres, force_vir, shake_vir,
1407                                          parrinellorahmanMu,
1408                                          state, nrnb, &upd);
1409
1410         /* ################# END UPDATE STEP 2 ################# */
1411         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1412
1413         /* The coordinates (x) were unshifted in update */
1414         if (!bGStat)
1415         {
1416             /* We will not sum ekinh_old,
1417              * so signal that we still have to do it.
1418              */
1419             bSumEkinhOld = TRUE;
1420         }
1421
1422         if (bCalcEner)
1423         {
1424             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1425
1426             /* use the directly determined last velocity, not actually the averaged half steps */
1427             if (bTrotter && ir->eI == eiVV)
1428             {
1429                 enerd->term[F_EKIN] = last_ekin;
1430             }
1431             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1432
1433             if (integratorHasConservedEnergyQuantity(ir))
1434             {
1435                 if (EI_VV(ir->eI))
1436                 {
1437                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1438                 }
1439                 else
1440                 {
1441                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1442                 }
1443             }
1444             /* #########  END PREPARING EDR OUTPUT  ###########  */
1445         }
1446
1447         /* Output stuff */
1448         if (MASTER(cr))
1449         {
1450             if (fplog && do_log && bDoExpanded)
1451             {
1452                 /* only needed if doing expanded ensemble */
1453                 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : nullptr,
1454                                           state_global->dfhist, state->fep_state, ir->nstlog, step);
1455             }
1456             if (bCalcEner)
1457             {
1458                 energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep,
1459                                                  t, mdatoms->tmass, enerd, state,
1460                                                  ir->fepvals, ir->expandedvals, lastbox,
1461                                                  shake_vir, force_vir, total_vir, pres,
1462                                                  ekind, mu_tot, constr);
1463             }
1464             else
1465             {
1466                 energyOutput.recordNonEnergyStep();
1467             }
1468
1469             gmx_bool do_dr  = do_per_step(step, ir->nstdisreout);
1470             gmx_bool do_or  = do_per_step(step, ir->nstorireout);
1471
1472             energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
1473             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1474                                                do_log ? fplog : nullptr,
1475                                                step, t,
1476                                                fcd, awh.get());
1477
1478             if (ir->bPull)
1479             {
1480                 pull_print_output(pull_work, step, t);
1481             }
1482
1483             if (do_per_step(step, ir->nstlog))
1484             {
1485                 if (fflush(fplog) != 0)
1486                 {
1487                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1488                 }
1489             }
1490         }
1491         if (bDoExpanded)
1492         {
1493             /* Have to do this part _after_ outputting the logfile and the edr file */
1494             /* Gets written into the state at the beginning of next loop*/
1495             state->fep_state = lamnew;
1496         }
1497         /* Print the remaining wall clock time for the run */
1498         if (isMasterSimMasterRank(ms, MASTER(cr)) &&
1499             (do_verbose || gmx_got_usr_signal()) &&
1500             !bPMETunePrinting)
1501         {
1502             if (shellfc)
1503             {
1504                 fprintf(stderr, "\n");
1505             }
1506             print_time(stderr, walltime_accounting, step, ir, cr);
1507         }
1508
1509         /* Ion/water position swapping.
1510          * Not done in last step since trajectory writing happens before this call
1511          * in the MD loop and exchanges would be lost anyway. */
1512         bNeedRepartition = FALSE;
1513         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
1514             do_per_step(step, ir->swap->nstswap))
1515         {
1516             bNeedRepartition = do_swapcoords(cr, step, t, ir, swap, wcycle,
1517                                              as_rvec_array(state->x.data()),
1518                                              state->box,
1519                                              MASTER(cr) && mdrunOptions.verbose,
1520                                              bRerunMD);
1521
1522             if (bNeedRepartition && DOMAINDECOMP(cr))
1523             {
1524                 dd_collect_state(cr->dd, state, state_global);
1525             }
1526         }
1527
1528         /* Replica exchange */
1529         bExchanged = FALSE;
1530         if (bDoReplEx)
1531         {
1532             bExchanged = replica_exchange(fplog, cr, ms, repl_ex,
1533                                           state_global, enerd,
1534                                           state, step, t);
1535         }
1536
1537         if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
1538         {
1539             dd_partition_system(fplog, mdlog, step, cr, TRUE, 1,
1540                                 state_global, *top_global, ir, imdSession,
1541                                 pull_work,
1542                                 state, &f, mdAtoms, &top, fr,
1543                                 vsite, constr,
1544                                 nrnb, wcycle, FALSE);
1545             shouldCheckNumberOfBondedInteractions = true;
1546             upd.setNumAtoms(state->natoms);
1547         }
1548
1549         bFirstStep             = FALSE;
1550         bInitStep              = FALSE;
1551
1552         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1553         /* With all integrators, except VV, we need to retain the pressure
1554          * at the current step for coupling at the next step.
1555          */
1556         if ((state->flags & (1U<<estPRES_PREV)) &&
1557             (bGStatEveryStep ||
1558              (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1559         {
1560             /* Store the pressure in t_state for pressure coupling
1561              * at the next MD step.
1562              */
1563             copy_mat(pres, state->pres_prev);
1564         }
1565
1566         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1567
1568         if ( (membed != nullptr) && (!bLastStep) )
1569         {
1570             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1571         }
1572
1573         cycles = wallcycle_stop(wcycle, ewcSTEP);
1574         if (DOMAINDECOMP(cr) && wcycle)
1575         {
1576             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1577         }
1578
1579         /* increase the MD step number */
1580         step++;
1581         step_rel++;
1582
1583         resetHandler->resetCounters(
1584                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(),
1585                 nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1586
1587         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1588         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1589
1590     }
1591     /* End of main MD loop */
1592
1593     /* Closing TNG files can include compressing data. Therefore it is good to do that
1594      * before stopping the time measurements. */
1595     mdoutf_tng_close(outf);
1596
1597     /* Stop measuring walltime */
1598     walltime_accounting_end_time(walltime_accounting);
1599
1600     if (!thisRankHasDuty(cr, DUTY_PME))
1601     {
1602         /* Tell the PME only node to finish */
1603         gmx_pme_send_finish(cr);
1604     }
1605
1606     if (MASTER(cr))
1607     {
1608         if (ir->nstcalcenergy > 0)
1609         {
1610             energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
1611             energyOutput.printAverages(fplog, groups);
1612         }
1613     }
1614     done_mdoutf(outf);
1615
1616     if (bPMETune)
1617     {
1618         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1619     }
1620
1621     done_shellfc(fplog, shellfc, step_rel);
1622
1623     if (useReplicaExchange && MASTER(cr))
1624     {
1625         print_replica_exchange_statistics(fplog, repl_ex);
1626     }
1627
1628     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1629
1630     global_stat_destroy(gstat);
1631
1632 }