30580ccfb15b84b9a6217737c879a2b9c84dad48
[alexxy/gromacs.git] / src / gromacs / mdrun / md.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 /*! \internal \file
38  *
39  * \brief Implements the integrator for normal molecular dynamics simulations
40  *
41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42  * \ingroup module_mdrun
43  */
44 #include "gmxpre.h"
45
46 #include <cinttypes>
47 #include <cmath>
48 #include <cstdio>
49 #include <cstdlib>
50
51 #include <algorithm>
52 #include <memory>
53
54 #include "gromacs/awh/awh.h"
55 #include "gromacs/commandline/filenm.h"
56 #include "gromacs/domdec/collect.h"
57 #include "gromacs/domdec/dlbtiming.h"
58 #include "gromacs/domdec/domdec.h"
59 #include "gromacs/domdec/domdec_network.h"
60 #include "gromacs/domdec/domdec_struct.h"
61 #include "gromacs/domdec/mdsetup.h"
62 #include "gromacs/domdec/partition.h"
63 #include "gromacs/essentialdynamics/edsam.h"
64 #include "gromacs/ewald/pme.h"
65 #include "gromacs/ewald/pme_load_balancing.h"
66 #include "gromacs/fileio/trxio.h"
67 #include "gromacs/gmxlib/network.h"
68 #include "gromacs/gmxlib/nrnb.h"
69 #include "gromacs/gpu_utils/gpu_utils.h"
70 #include "gromacs/imd/imd.h"
71 #include "gromacs/listed_forces/manage_threading.h"
72 #include "gromacs/math/functions.h"
73 #include "gromacs/math/utilities.h"
74 #include "gromacs/math/vec.h"
75 #include "gromacs/math/vectypes.h"
76 #include "gromacs/mdlib/checkpointhandler.h"
77 #include "gromacs/mdlib/compute_io.h"
78 #include "gromacs/mdlib/constr.h"
79 #include "gromacs/mdlib/ebin.h"
80 #include "gromacs/mdlib/enerdata_utils.h"
81 #include "gromacs/mdlib/energyoutput.h"
82 #include "gromacs/mdlib/expanded.h"
83 #include "gromacs/mdlib/force.h"
84 #include "gromacs/mdlib/force_flags.h"
85 #include "gromacs/mdlib/forcerec.h"
86 #include "gromacs/mdlib/md_support.h"
87 #include "gromacs/mdlib/mdatoms.h"
88 #include "gromacs/mdlib/mdoutf.h"
89 #include "gromacs/mdlib/membed.h"
90 #include "gromacs/mdlib/resethandler.h"
91 #include "gromacs/mdlib/sighandler.h"
92 #include "gromacs/mdlib/simulationsignal.h"
93 #include "gromacs/mdlib/stat.h"
94 #include "gromacs/mdlib/stophandler.h"
95 #include "gromacs/mdlib/tgroup.h"
96 #include "gromacs/mdlib/trajectory_writing.h"
97 #include "gromacs/mdlib/update.h"
98 #include "gromacs/mdlib/update_constrain_cuda.h"
99 #include "gromacs/mdlib/vcm.h"
100 #include "gromacs/mdlib/vsite.h"
101 #include "gromacs/mdrunutility/handlerestart.h"
102 #include "gromacs/mdrunutility/multisim.h"
103 #include "gromacs/mdrunutility/printtime.h"
104 #include "gromacs/mdtypes/awh_history.h"
105 #include "gromacs/mdtypes/awh_params.h"
106 #include "gromacs/mdtypes/commrec.h"
107 #include "gromacs/mdtypes/df_history.h"
108 #include "gromacs/mdtypes/energyhistory.h"
109 #include "gromacs/mdtypes/fcdata.h"
110 #include "gromacs/mdtypes/forcerec.h"
111 #include "gromacs/mdtypes/group.h"
112 #include "gromacs/mdtypes/inputrec.h"
113 #include "gromacs/mdtypes/interaction_const.h"
114 #include "gromacs/mdtypes/md_enums.h"
115 #include "gromacs/mdtypes/mdatom.h"
116 #include "gromacs/mdtypes/mdrunoptions.h"
117 #include "gromacs/mdtypes/observableshistory.h"
118 #include "gromacs/mdtypes/pullhistory.h"
119 #include "gromacs/mdtypes/state.h"
120 #include "gromacs/modularsimulator/energyelement.h"
121 #include "gromacs/nbnxm/nbnxm.h"
122 #include "gromacs/pbcutil/mshift.h"
123 #include "gromacs/pbcutil/pbc.h"
124 #include "gromacs/pulling/output.h"
125 #include "gromacs/pulling/pull.h"
126 #include "gromacs/swap/swapcoords.h"
127 #include "gromacs/timing/wallcycle.h"
128 #include "gromacs/timing/walltime_accounting.h"
129 #include "gromacs/topology/atoms.h"
130 #include "gromacs/topology/idef.h"
131 #include "gromacs/topology/mtop_util.h"
132 #include "gromacs/topology/topology.h"
133 #include "gromacs/trajectory/trajectoryframe.h"
134 #include "gromacs/utility/basedefinitions.h"
135 #include "gromacs/utility/cstringutil.h"
136 #include "gromacs/utility/fatalerror.h"
137 #include "gromacs/utility/logger.h"
138 #include "gromacs/utility/real.h"
139 #include "gromacs/utility/smalloc.h"
140
141 #include "legacysimulator.h"
142 #include "replicaexchange.h"
143 #include "shellfc.h"
144
145 #if GMX_FAHCORE
146 #include "corewrap.h"
147 #endif
148
149 using gmx::SimulationSignaller;
150
151 //! Whether the GPU versions of Leap-Frog integrator and LINCS and SETTLE constraints
152 static const bool c_useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
153
154 void gmx::LegacySimulator::do_md()
155 {
156     // TODO Historically, the EM and MD "integrators" used different
157     // names for the t_inputrec *parameter, but these must have the
158     // same name, now that it's a member of a struct. We use this ir
159     // alias to avoid a large ripple of nearly useless changes.
160     // t_inputrec is being replaced by IMdpOptionsProvider, so this
161     // will go away eventually.
162     t_inputrec                 *ir   = inputrec;
163     int64_t                     step, step_rel;
164     double                      t, t0 = ir->init_t, lam0[efptNR];
165     gmx_bool                    bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
166     gmx_bool                    bNS, bNStList, bStopCM,
167                                 bFirstStep, bInitStep, bLastStep = FALSE;
168     gmx_bool                    bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
169     gmx_bool                    do_ene, do_log, do_verbose;
170     gmx_bool                    bMasterState;
171     unsigned int                force_flags;
172     tensor                      force_vir     = {{0}}, shake_vir = {{0}}, total_vir = {{0}},
173                                 tmp_vir       = {{0}}, pres = {{0}};
174     int                         i, m;
175     rvec                        mu_tot;
176     matrix                      parrinellorahmanMu, M;
177     gmx_repl_ex_t               repl_ex = nullptr;
178     gmx_localtop_t              top;
179     PaddedHostVector<gmx::RVec> f {};
180     gmx_global_stat_t           gstat;
181     t_graph                    *graph = nullptr;
182     gmx_shellfc_t              *shellfc;
183     gmx_bool                    bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
184     gmx_bool                    bTemp, bPres, bTrotter;
185     real                        dvdl_constr;
186     std::vector<RVec>           cbuf;
187     matrix                      lastbox;
188     int                         lamnew  = 0;
189     /* for FEP */
190     int                         nstfep = 0;
191     double                      cycles;
192     real                        saved_conserved_quantity = 0;
193     real                        last_ekin                = 0;
194     t_extmass                   MassQ;
195     char                        sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
196
197     /* PME load balancing data for GPU kernels */
198     gmx_bool              bPMETune         = FALSE;
199     gmx_bool              bPMETunePrinting = FALSE;
200
201     bool                  bInteractiveMDstep = false;
202
203     /* Domain decomposition could incorrectly miss a bonded
204        interaction, but checking for that requires a global
205        communication stage, which does not otherwise happen in DD
206        code. So we do that alongside the first global energy reduction
207        after a new DD is made. These variables handle whether the
208        check happens, and the result it returns. */
209     bool              shouldCheckNumberOfBondedInteractions = false;
210     int               totalNumberOfBondedInteractions       = -1;
211
212     SimulationSignals signals;
213     // Most global communnication stages don't propagate mdrun
214     // signals, and will use this object to achieve that.
215     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
216
217     if (!mdrunOptions.writeConfout)
218     {
219         // This is on by default, and the main known use case for
220         // turning it off is for convenience in benchmarking, which is
221         // something that should not show up in the general user
222         // interface.
223         GMX_LOG(mdlog.info).asParagraph().
224             appendText("The -noconfout functionality is deprecated, and may be removed in a future version.");
225     }
226
227     /* md-vv uses averaged full step velocities for T-control
228        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
229        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
230     bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
231
232     const bool bRerunMD      = false;
233
234     int        nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
235     bGStatEveryStep = (nstglobalcomm == 1);
236
237     SimulationGroups                  *groups = &top_global->groups;
238
239     std::unique_ptr<EssentialDynamics> ed = nullptr;
240     if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr)
241     {
242         /* Initialize essential dynamics sampling */
243         ed = init_edsam(mdlog,
244                         opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm),
245                         top_global,
246                         ir, cr, constr,
247                         state_global, observablesHistory,
248                         oenv,
249                         startingBehavior);
250     }
251
252     initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
253     Update     upd(ir, deform);
254     const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
255     if (startingBehavior != StartingBehavior::RestartWithAppending)
256     {
257         pleaseCiteCouplingAlgorithms(fplog, *ir);
258     }
259     gmx_mdoutf       *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir, top_global, oenv, wcycle,
260                                          StartingBehavior::NewSimulation);
261     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work, mdoutf_get_fp_dhdl(outf), false, mdModulesNotifier);
262
263     gstat = global_stat_init(ir);
264
265     /* Check for polarizable models and flexible constraints */
266     shellfc = init_shell_flexcon(fplog,
267                                  top_global, constr ? constr->numFlexibleConstraints() : 0,
268                                  ir->nstcalcenergy, DOMAINDECOMP(cr));
269
270     {
271         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
272         if ((io > 2000) && MASTER(cr))
273         {
274             fprintf(stderr,
275                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
276                     io);
277         }
278     }
279
280     // Local state only becomes valid now.
281     std::unique_ptr<t_state> stateInstance;
282     t_state *                state;
283
284
285     auto mdatoms = mdAtoms->mdatoms();
286
287     std::unique_ptr<UpdateConstrainCuda> integrator;
288
289     if (DOMAINDECOMP(cr))
290     {
291         dd_init_local_top(*top_global, &top);
292
293         stateInstance = std::make_unique<t_state>();
294         state         = stateInstance.get();
295         dd_init_local_state(cr->dd, state_global, state);
296
297         /* Distribute the charge groups over the nodes from the master node */
298         dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1,
299                             state_global, *top_global, ir, imdSession,
300                             pull_work,
301                             state, &f, mdAtoms, &top, fr,
302                             vsite, constr,
303                             nrnb, nullptr, FALSE);
304         shouldCheckNumberOfBondedInteractions = true;
305         upd.setNumAtoms(state->natoms);
306     }
307     else
308     {
309         state_change_natoms(state_global, state_global->natoms);
310         f.resizeWithPadding(state_global->natoms);
311         /* Copy the pointer to the global state */
312         state = state_global;
313
314         /* Generate and initialize new topology */
315         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr,
316                                   &graph, mdAtoms, constr, vsite, shellfc);
317
318         upd.setNumAtoms(state->natoms);
319     }
320
321     if (c_useGpuUpdateConstrain)
322     {
323         GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU.");
324         GMX_RELEASE_ASSERT(ir->etc != etcNOSEHOOVER, "Nose Hoover temperature coupling is not supported on the GPU.");
325         GMX_RELEASE_ASSERT(ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN, "Only Parrinello Rahman pressure control is supported on the GPU.");
326         GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU");
327         GMX_RELEASE_ASSERT(ed == nullptr, "Essential dynamics is not supported with GPU-based update constraints.");
328         GMX_LOG(mdlog.info).asParagraph().
329             appendText("Using CUDA GPU-based update and constraints module.");
330         integrator = std::make_unique<UpdateConstrainCuda>(*ir, *top_global);
331         integrator->set(top.idef, *mdatoms, ekind->ngtc);
332         t_pbc pbc;
333         set_pbc(&pbc, epbcXYZ, state->box);
334         integrator->setPbc(&pbc);
335     }
336
337     if (fr->nbv->useGpu())
338     {
339         changePinningPolicy(&state->x, gmx::PinningPolicy::PinnedIfSupported);
340     }
341
342     // NOTE: The global state is no longer used at this point.
343     // But state_global is still used as temporary storage space for writing
344     // the global state to file and potentially for replica exchange.
345     // (Global topology should persist.)
346
347     update_mdatoms(mdatoms, state->lambda[efptMASS]);
348
349     if (ir->bExpanded)
350     {
351         /* Check nstexpanded here, because the grompp check was broken */
352         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
353         {
354             gmx_fatal(FARGS, "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
355         }
356         init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation,
357                                ir, state->dfhist);
358     }
359
360     if (MASTER(cr))
361     {
362         EnergyElement::initializeEnergyHistory(
363                 startingBehavior, observablesHistory, &energyOutput);
364     }
365
366     preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
367                            startingBehavior != StartingBehavior::NewSimulation);
368
369     // TODO: Remove this by converting AWH into a ForceProvider
370     auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingBehavior != StartingBehavior::NewSimulation,
371                                 shellfc != nullptr,
372                                 opt2fn("-awh", nfile, fnm), pull_work);
373
374     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
375     if (useReplicaExchange && MASTER(cr))
376     {
377         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir,
378                                         replExParams);
379     }
380     /* PME tuning is only supported in the Verlet scheme, with PME for
381      * Coulomb. It is not supported with only LJ PME. */
382     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) &&
383                 !mdrunOptions.reproducible && ir->cutoff_scheme != ecutsGROUP);
384
385     pme_load_balancing_t *pme_loadbal      = nullptr;
386     if (bPMETune)
387     {
388         pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
389                          *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu(),
390                          &bPMETunePrinting);
391     }
392
393     if (!ir->bContinuation)
394     {
395         if (state->flags & (1U << estV))
396         {
397             auto v = makeArrayRef(state->v);
398             /* Set the velocities of vsites, shells and frozen atoms to zero */
399             for (i = 0; i < mdatoms->homenr; i++)
400             {
401                 if (mdatoms->ptype[i] == eptVSite ||
402                     mdatoms->ptype[i] == eptShell)
403                 {
404                     clear_rvec(v[i]);
405                 }
406                 else if (mdatoms->cFREEZE)
407                 {
408                     for (m = 0; m < DIM; m++)
409                     {
410                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
411                         {
412                             v[i][m] = 0;
413                         }
414                     }
415                 }
416             }
417         }
418
419         if (constr)
420         {
421             /* Constrain the initial coordinates and velocities */
422             do_constrain_first(fplog, constr, ir, mdatoms,
423                                state->natoms,
424                                state->x.arrayRefWithPadding(),
425                                state->v.arrayRefWithPadding(),
426                                state->box, state->lambda[efptBONDED]);
427         }
428         if (vsite)
429         {
430             /* Construct the virtual sites for the initial configuration */
431             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr,
432                              top.idef.iparams, top.idef.il,
433                              fr->ePBC, fr->bMolPBC, cr, state->box);
434         }
435     }
436
437     if (ir->efep != efepNO)
438     {
439         /* Set free energy calculation frequency as the greatest common
440          * denominator of nstdhdl and repl_ex_nst. */
441         nstfep = ir->fepvals->nstdhdl;
442         if (ir->bExpanded)
443         {
444             nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
445         }
446         if (useReplicaExchange)
447         {
448             nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
449         }
450     }
451
452     /* Be REALLY careful about what flags you set here. You CANNOT assume
453      * this is the first step, since we might be restarting from a checkpoint,
454      * and in that case we should not do any modifications to the state.
455      */
456     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
457
458     // When restarting from a checkpoint, it can be appropriate to
459     // initialize ekind from quantities in the checkpoint. Otherwise,
460     // compute_globals must initialize ekind before the simulation
461     // starts/restarts. However, only the master rank knows what was
462     // found in the checkpoint file, so we have to communicate in
463     // order to coordinate the restart.
464     //
465     // TODO Consider removing this communication if/when checkpoint
466     // reading directly follows .tpr reading, because all ranks can
467     // agree on hasReadEkinState at that time.
468     bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
469     if (PAR(cr))
470     {
471         gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
472     }
473     if (hasReadEkinState)
474     {
475         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
476     }
477
478     unsigned int cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
479                                | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
480                                | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0)
481                                | (hasReadEkinState ? CGLO_READEKIN : 0));
482
483     bSumEkinhOld = FALSE;
484
485     t_vcm vcm(top_global->groups, *ir);
486     reportComRemovalInfo(fplog, vcm);
487
488     /* To minimize communication, compute_globals computes the COM velocity
489      * and the kinetic energy for the velocities without COM motion removed.
490      * Thus to get the kinetic energy without the COM contribution, we need
491      * to call compute_globals twice.
492      */
493     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
494     {
495         unsigned int cglo_flags_iteration = cglo_flags;
496         if (bStopCM && cgloIteration == 0)
497         {
498             cglo_flags_iteration |= CGLO_STOPCM;
499             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
500         }
501         compute_globals(gstat, cr, ir, fr, ekind,
502                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
503                         mdatoms, nrnb, &vcm,
504                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
505                         constr, &nullSignaller, state->box,
506                         &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags_iteration
507                         | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0));
508         if (cglo_flags_iteration & CGLO_STOPCM)
509         {
510             /* At initialization, do not pass x with acceleration-correction mode
511              * to avoid (incorrect) correction of the initial coordinates.
512              */
513             rvec *xPtr = nullptr;
514             if (vcm.mode != ecmLINEAR_ACCELERATION_CORRECTION)
515             {
516                 xPtr = state->x.rvec_array();
517             }
518             process_and_stopcm_grp(fplog, &vcm, *mdatoms, xPtr, state->v.rvec_array());
519             inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
520         }
521     }
522     checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
523                                     top_global, &top, state->x.rvec_array(), state->box,
524                                     &shouldCheckNumberOfBondedInteractions);
525     if (ir->eI == eiVVAK)
526     {
527         /* a second call to get the half step temperature initialized as well */
528         /* we do the same call as above, but turn the pressure off -- internally to
529            compute_globals, this is recognized as a velocity verlet half-step
530            kinetic energy calculation.  This minimized excess variables, but
531            perhaps loses some logic?*/
532
533         compute_globals(gstat, cr, ir, fr, ekind,
534                         state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
535                         mdatoms, nrnb, &vcm,
536                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
537                         constr, &nullSignaller, state->box,
538                         nullptr, &bSumEkinhOld,
539                         cglo_flags & ~CGLO_PRESSURE);
540     }
541
542     /* Calculate the initial half step temperature, and save the ekinh_old */
543     if (startingBehavior == StartingBehavior::NewSimulation)
544     {
545         for (i = 0; (i < ir->opts.ngtc); i++)
546         {
547             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
548         }
549     }
550
551     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
552        temperature control */
553     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
554
555     if (MASTER(cr))
556     {
557         if (!ir->bContinuation)
558         {
559             if (constr && ir->eConstrAlg == econtLINCS)
560             {
561                 fprintf(fplog,
562                         "RMS relative constraint deviation after constraining: %.2e\n",
563                         constr->rmsd());
564             }
565             if (EI_STATE_VELOCITY(ir->eI))
566             {
567                 real temp = enerd->term[F_TEMP];
568                 if (ir->eI != eiVV)
569                 {
570                     /* Result of Ekin averaged over velocities of -half
571                      * and +half step, while we only have -half step here.
572                      */
573                     temp *= 2;
574                 }
575                 fprintf(fplog, "Initial temperature: %g K\n", temp);
576             }
577         }
578
579         char tbuf[20];
580         fprintf(stderr, "starting mdrun '%s'\n",
581                 *(top_global->name));
582         if (ir->nsteps >= 0)
583         {
584             sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
585         }
586         else
587         {
588             sprintf(tbuf, "%s", "infinite");
589         }
590         if (ir->init_step > 0)
591         {
592             fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
593                     gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
594                     gmx_step_str(ir->init_step, sbuf2),
595                     ir->init_step*ir->delta_t);
596         }
597         else
598         {
599             fprintf(stderr, "%s steps, %s ps.\n",
600                     gmx_step_str(ir->nsteps, sbuf), tbuf);
601         }
602         fprintf(fplog, "\n");
603     }
604
605     walltime_accounting_start_time(walltime_accounting);
606     wallcycle_start(wcycle, ewcRUN);
607     print_start(fplog, cr, walltime_accounting, "mdrun");
608
609 #if GMX_FAHCORE
610     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
611     int chkpt_ret = fcCheckPointParallel( cr->nodeid,
612                                           NULL, 0);
613     if (chkpt_ret == 0)
614     {
615         gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
616     }
617 #endif
618
619     /***********************************************************
620      *
621      *             Loop over MD steps
622      *
623      ************************************************************/
624
625     bFirstStep       = TRUE;
626     /* Skip the first Nose-Hoover integration when we get the state from tpx */
627     bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
628     bSumEkinhOld     = FALSE;
629     bExchanged       = FALSE;
630     bNeedRepartition = FALSE;
631
632     bool simulationsShareState = false;
633     int  nstSignalComm         = nstglobalcomm;
634     {
635         // TODO This implementation of ensemble orientation restraints is nasty because
636         // a user can't just do multi-sim with single-sim orientation restraints.
637         bool usingEnsembleRestraints = (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
638         bool awhUsesMultiSim         = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
639
640         // Replica exchange, ensemble restraints and AWH need all
641         // simulations to remain synchronized, so they need
642         // checkpoints and stop conditions to act on the same step, so
643         // the propagation of such signals must take place between
644         // simulations, not just within simulations.
645         // TODO: Make algorithm initializers set these flags.
646         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
647
648         if (simulationsShareState)
649         {
650             // Inter-simulation signal communication does not need to happen
651             // often, so we use a minimum of 200 steps to reduce overhead.
652             const int c_minimumInterSimulationSignallingInterval = 200;
653             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1)/nstglobalcomm)*nstglobalcomm;
654         }
655     }
656
657     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
658                 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
659                 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
660                 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
661
662     auto checkpointHandler = std::make_unique<CheckpointHandler>(
663                 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
664                 simulationsShareState, ir->nstlist == 0, MASTER(cr),
665                 mdrunOptions.writeConfout, mdrunOptions.checkpointOptions.period);
666
667     const bool resetCountersIsLocal = true;
668     auto       resetHandler         = std::make_unique<ResetHandler>(
669                 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]), !resetCountersIsLocal,
670                 ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
671                 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
672
673     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
674
675     step     = ir->init_step;
676     step_rel = 0;
677
678     // TODO extract this to new multi-simulation module
679     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
680     {
681         if (!multisim_int_all_are_equal(ms, ir->nsteps))
682         {
683             GMX_LOG(mdlog.warning).appendText(
684                     "Note: The number of steps is not consistent across multi simulations,\n"
685                     "but we are proceeding anyway!");
686         }
687         if (!multisim_int_all_are_equal(ms, ir->init_step))
688         {
689             GMX_LOG(mdlog.warning).appendText(
690                     "Note: The initial step is not consistent across multi simulations,\n"
691                     "but we are proceeding anyway!");
692         }
693     }
694
695     /* and stop now if we should */
696     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
697     while (!bLastStep)
698     {
699
700         /* Determine if this is a neighbor search step */
701         bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
702
703         if (bPMETune && bNStList)
704         {
705             /* PME grid + cut-off optimization with GPUs or PME nodes */
706             pme_loadbal_do(pme_loadbal, cr,
707                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
708                            fplog, mdlog,
709                            *ir, fr, state->box, state->x,
710                            wcycle,
711                            step, step_rel,
712                            &bPMETunePrinting);
713         }
714
715         wallcycle_start(wcycle, ewcSTEP);
716
717         bLastStep = (step_rel == ir->nsteps);
718         t         = t0 + step*ir->delta_t;
719
720         // TODO Refactor this, so that nstfep does not need a default value of zero
721         if (ir->efep != efepNO || ir->bSimTemp)
722         {
723             /* find and set the current lambdas */
724             setCurrentLambdasLocal(step, ir->fepvals, lam0, state->lambda, state->fep_state);
725
726             bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
727             bDoFEP       = ((ir->efep != efepNO) && do_per_step(step, nstfep));
728             bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
729                             && (ir->bExpanded) && (step > 0) &&
730                             (startingBehavior == StartingBehavior::NewSimulation));
731         }
732
733         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep &&
734                      do_per_step(step, replExParams.exchangeInterval));
735
736         if (doSimulatedAnnealing)
737         {
738             update_annealing_target_temp(ir, t, &upd);
739         }
740
741         /* Stop Center of Mass motion */
742         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
743
744         /* Determine whether or not to do Neighbour Searching */
745         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
746
747         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
748
749         /* do_log triggers energy and virial calculation. Because this leads
750          * to different code paths, forces can be different. Thus for exact
751          * continuation we should avoid extra log output.
752          * Note that the || bLastStep can result in non-exact continuation
753          * beyond the last step. But we don't consider that to be an issue.
754          */
755         do_log     =
756             (do_per_step(step, ir->nstlog) ||
757              (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) ||
758              bLastStep);
759         do_verbose = mdrunOptions.verbose &&
760             (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
761
762         if (bNS && !(bFirstStep && ir->bContinuation))
763         {
764             bMasterState = FALSE;
765             /* Correct the new box if it is too skewed */
766             if (inputrecDynamicBox(ir))
767             {
768                 if (correct_box(fplog, step, state->box, graph))
769                 {
770                     bMasterState = TRUE;
771                 }
772             }
773             if (DOMAINDECOMP(cr) && bMasterState)
774             {
775                 dd_collect_state(cr->dd, state, state_global);
776             }
777
778             if (DOMAINDECOMP(cr))
779             {
780                 /* Repartition the domain decomposition */
781                 dd_partition_system(fplog, mdlog, step, cr,
782                                     bMasterState, nstglobalcomm,
783                                     state_global, *top_global, ir, imdSession,
784                                     pull_work,
785                                     state, &f, mdAtoms, &top, fr,
786                                     vsite, constr,
787                                     nrnb, wcycle,
788                                     do_verbose && !bPMETunePrinting);
789                 shouldCheckNumberOfBondedInteractions = true;
790                 upd.setNumAtoms(state->natoms);
791             }
792         }
793
794         if (MASTER(cr) && do_log)
795         {
796             energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
797         }
798
799         if (ir->efep != efepNO)
800         {
801             update_mdatoms(mdatoms, state->lambda[efptMASS]);
802         }
803
804         if (bExchanged)
805         {
806
807             /* We need the kinetic energy at minus the half step for determining
808              * the full step kinetic energy and possibly for T-coupling.*/
809             /* This may not be quite working correctly yet . . . . */
810             compute_globals(gstat, cr, ir, fr, ekind,
811                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
812                             mdatoms, nrnb, &vcm,
813                             wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
814                             constr, &nullSignaller, state->box,
815                             &totalNumberOfBondedInteractions, &bSumEkinhOld,
816                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
817             checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
818                                             top_global, &top, state->x.rvec_array(), state->box,
819                                             &shouldCheckNumberOfBondedInteractions);
820         }
821         clear_mat(force_vir);
822
823         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
824
825         /* Determine the energy and pressure:
826          * at nstcalcenergy steps and at energy output steps (set below).
827          */
828         if (EI_VV(ir->eI) && (!bInitStep))
829         {
830             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
831             bCalcVir      = bCalcEnerStep ||
832                 (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
833         }
834         else
835         {
836             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
837             bCalcVir      = bCalcEnerStep ||
838                 (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
839         }
840         bCalcEner = bCalcEnerStep;
841
842         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
843
844         if (do_ene || do_log || bDoReplEx)
845         {
846             bCalcVir  = TRUE;
847             bCalcEner = TRUE;
848         }
849
850         /* Do we need global communication ? */
851         bGStat = (bCalcVir || bCalcEner || bStopCM ||
852                   do_per_step(step, nstglobalcomm) ||
853                   (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm)));
854
855         force_flags = (GMX_FORCE_STATECHANGED |
856                        ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) |
857                        GMX_FORCE_ALLFORCES |
858                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
859                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
860                        (bDoFEP ? GMX_FORCE_DHDL : 0)
861                        );
862
863         if (shellfc)
864         {
865             /* Now is the time to relax the shells */
866             relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose,
867                                 enforcedRotation, step,
868                                 ir, imdSession, pull_work, bNS, force_flags, &top,
869                                 constr, enerd, fcd,
870                                 state->natoms,
871                                 state->x.arrayRefWithPadding(),
872                                 state->v.arrayRefWithPadding(),
873                                 state->box,
874                                 state->lambda,
875                                 &state->hist,
876                                 f.arrayRefWithPadding(), force_vir, mdatoms,
877                                 nrnb, wcycle, graph,
878                                 shellfc, fr, runScheduleWork, t, mu_tot,
879                                 vsite,
880                                 ddBalanceRegionHandler);
881         }
882         else
883         {
884             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias is updated
885                (or the AWH update will be performed twice for one step when continuing). It would be best to
886                call this update function from do_md_trajectory_writing but that would occur after do_force.
887                One would have to divide the update_awh function into one function applying the AWH force
888                and one doing the AWH bias update. The update AWH bias function could then be called after
889                do_md_trajectory_writing (then containing update_awh_history).
890                The checkpointing will in the future probably moved to the start of the md loop which will
891                rid of this issue. */
892             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
893             {
894                 awh->updateHistory(state_global->awhHistory.get());
895             }
896
897             /* The coordinates (x) are shifted (to get whole molecules)
898              * in do_force.
899              * This is parallellized as well, and does communication too.
900              * Check comments in sim_util.c
901              */
902             do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession,
903                      pull_work,
904                      step, nrnb, wcycle, &top,
905                      state->box, state->x.arrayRefWithPadding(), &state->hist,
906                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
907                      state->lambda, graph,
908                      fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
909                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
910                      ddBalanceRegionHandler);
911         }
912
913         // VV integrators do not need the following velocity half step
914         // if it is the first step after starting from a checkpoint.
915         // That is, the half step is needed on all other steps, and
916         // also the first step when starting from a .tpr file.
917         if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
918         /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
919         {
920             rvec *vbuf = nullptr;
921
922             wallcycle_start(wcycle, ewcUPDATE);
923             if (ir->eI == eiVV && bInitStep)
924             {
925                 /* if using velocity verlet with full time step Ekin,
926                  * take the first half step only to compute the
927                  * virial for the first step. From there,
928                  * revert back to the initial coordinates
929                  * so that the input is actually the initial step.
930                  */
931                 snew(vbuf, state->natoms);
932                 copy_rvecn(state->v.rvec_array(), vbuf, 0, state->natoms); /* should make this better for parallelizing? */
933             }
934             else
935             {
936                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
937                 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
938             }
939
940             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
941                           ekind, M, &upd, etrtVELOCITY1,
942                           cr, constr);
943
944             wallcycle_stop(wcycle, ewcUPDATE);
945             constrain_velocities(step, nullptr,
946                                  state,
947                                  shake_vir,
948                                  constr,
949                                  bCalcVir, do_log, do_ene);
950             wallcycle_start(wcycle, ewcUPDATE);
951             /* if VV, compute the pressure and constraints */
952             /* For VV2, we strictly only need this if using pressure
953              * control, but we really would like to have accurate pressures
954              * printed out.
955              * Think about ways around this in the future?
956              * For now, keep this choice in comments.
957              */
958             /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
959             /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
960             bPres = TRUE;
961             bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
962             if (bCalcEner && ir->eI == eiVVAK)
963             {
964                 bSumEkinhOld = TRUE;
965             }
966             /* for vv, the first half of the integration actually corresponds to the previous step.
967                So we need information from the last step in the first half of the integration */
968             if (bGStat || do_per_step(step-1, nstglobalcomm))
969             {
970                 wallcycle_stop(wcycle, ewcUPDATE);
971                 compute_globals(gstat, cr, ir, fr, ekind,
972                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
973                                 mdatoms, nrnb, &vcm,
974                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
975                                 constr, &nullSignaller, state->box,
976                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
977                                 (bGStat ? CGLO_GSTAT : 0)
978                                 | (bCalcEner ? CGLO_ENERGY : 0)
979                                 | (bTemp ? CGLO_TEMPERATURE : 0)
980                                 | (bPres ? CGLO_PRESSURE : 0)
981                                 | (bPres ? CGLO_CONSTRAINT : 0)
982                                 | (bStopCM ? CGLO_STOPCM : 0)
983                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
984                                 | CGLO_SCALEEKIN
985                                 );
986                 /* explanation of above:
987                    a) We compute Ekin at the full time step
988                    if 1) we are using the AveVel Ekin, and it's not the
989                    initial step, or 2) if we are using AveEkin, but need the full
990                    time step kinetic energy for the pressure (always true now, since we want accurate statistics).
991                    b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
992                    EkinAveVel because it's needed for the pressure */
993                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
994                                                 top_global, &top, state->x.rvec_array(), state->box,
995                                                 &shouldCheckNumberOfBondedInteractions);
996                 if (bStopCM)
997                 {
998                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
999                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1000                 }
1001                 wallcycle_start(wcycle, ewcUPDATE);
1002             }
1003             /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1004             if (!bInitStep)
1005             {
1006                 if (bTrotter)
1007                 {
1008                     m_add(force_vir, shake_vir, total_vir);     /* we need the un-dispersion corrected total vir here */
1009                     trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
1010
1011                     /* TODO This is only needed when we're about to write
1012                      * a checkpoint, because we use it after the restart
1013                      * (in a kludge?). But what should we be doing if
1014                      * the startingBehavior is NewSimulation or bInitStep are true? */
1015                     if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
1016                     {
1017                         copy_mat(shake_vir, state->svir_prev);
1018                         copy_mat(force_vir, state->fvir_prev);
1019                     }
1020                     if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
1021                     {
1022                         /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1023                         enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
1024                         enerd->term[F_EKIN] = trace(ekind->ekin);
1025                     }
1026                 }
1027                 else if (bExchanged)
1028                 {
1029                     wallcycle_stop(wcycle, ewcUPDATE);
1030                     /* We need the kinetic energy at minus the half step for determining
1031                      * the full step kinetic energy and possibly for T-coupling.*/
1032                     /* This may not be quite working correctly yet . . . . */
1033                     compute_globals(gstat, cr, ir, fr, ekind,
1034                                     state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1035                                     mdatoms, nrnb, &vcm,
1036                                     wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
1037                                     constr, &nullSignaller, state->box,
1038                                     nullptr, &bSumEkinhOld,
1039                                     CGLO_GSTAT | CGLO_TEMPERATURE);
1040                     wallcycle_start(wcycle, ewcUPDATE);
1041                 }
1042             }
1043             /* if it's the initial step, we performed this first step just to get the constraint virial */
1044             if (ir->eI == eiVV && bInitStep)
1045             {
1046                 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
1047                 sfree(vbuf);
1048             }
1049             wallcycle_stop(wcycle, ewcUPDATE);
1050         }
1051
1052         /* compute the conserved quantity */
1053         if (EI_VV(ir->eI))
1054         {
1055             saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1056             if (ir->eI == eiVV)
1057             {
1058                 last_ekin = enerd->term[F_EKIN];
1059             }
1060             if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1061             {
1062                 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1063             }
1064             /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
1065             if (ir->efep != efepNO)
1066             {
1067                 sum_dhdl(enerd, state->lambda, *ir->fepvals);
1068             }
1069         }
1070
1071         /* ########  END FIRST UPDATE STEP  ############## */
1072         /* ########  If doing VV, we now have v(dt) ###### */
1073         if (bDoExpanded)
1074         {
1075             /* perform extended ensemble sampling in lambda - we don't
1076                actually move to the new state before outputting
1077                statistics, but if performing simulated tempering, we
1078                do update the velocities and the tau_t. */
1079
1080             lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, state->dfhist, step, state->v.rvec_array(), mdatoms);
1081             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1082             if (MASTER(cr))
1083             {
1084                 copy_df_history(state_global->dfhist, state->dfhist);
1085             }
1086         }
1087
1088         /* Now we have the energies and forces corresponding to the
1089          * coordinates at time t. We must output all of this before
1090          * the update.
1091          */
1092         do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
1093                                  ir, state, state_global, observablesHistory,
1094                                  top_global, fr,
1095                                  outf, energyOutput, ekind, f,
1096                                  checkpointHandler->isCheckpointingStep(),
1097                                  bRerunMD, bLastStep,
1098                                  mdrunOptions.writeConfout,
1099                                  bSumEkinhOld);
1100         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1101         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1102
1103         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1104         if (startingBehavior != StartingBehavior::NewSimulation &&
1105             (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1106         {
1107             copy_mat(state->svir_prev, shake_vir);
1108             copy_mat(state->fvir_prev, force_vir);
1109         }
1110
1111         stopHandler->setSignal();
1112         resetHandler->setSignal(walltime_accounting);
1113
1114         if (bGStat || !PAR(cr))
1115         {
1116             /* In parallel we only have to check for checkpointing in steps
1117              * where we do global communication,
1118              *  otherwise the other nodes don't know.
1119              */
1120             checkpointHandler->setSignal(walltime_accounting);
1121         }
1122
1123         /* #########   START SECOND UPDATE STEP ################# */
1124
1125         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
1126            in preprocessing */
1127
1128         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1129         {
1130             gmx_bool bIfRandomize;
1131             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1132             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1133             if (constr && bIfRandomize)
1134             {
1135                 constrain_velocities(step, nullptr,
1136                                      state,
1137                                      tmp_vir,
1138                                      constr,
1139                                      bCalcVir, do_log, do_ene);
1140             }
1141         }
1142         /* Box is changed in update() when we do pressure coupling,
1143          * but we should still use the old box for energy corrections and when
1144          * writing it to the energy file, so it matches the trajectory files for
1145          * the same timestep above. Make a copy in a separate array.
1146          */
1147         copy_mat(state->box, lastbox);
1148
1149         dvdl_constr = 0;
1150
1151         wallcycle_start(wcycle, ewcUPDATE);
1152         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1153         if (bTrotter)
1154         {
1155             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1156             /* We can only do Berendsen coupling after we have summed
1157              * the kinetic energy or virial. Since the happens
1158              * in global_state after update, we should only do it at
1159              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1160              */
1161         }
1162         else
1163         {
1164             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1165             update_pcouple_before_coordinates(fplog, step, ir, state,
1166                                               parrinellorahmanMu, M,
1167                                               bInitStep);
1168         }
1169
1170         if (EI_VV(ir->eI))
1171         {
1172             /* velocity half-step update */
1173             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1174                           ekind, M, &upd, etrtVELOCITY2,
1175                           cr, constr);
1176         }
1177
1178         /* Above, initialize just copies ekinh into ekin,
1179          * it doesn't copy position (for VV),
1180          * and entire integrator for MD.
1181          */
1182
1183         if (ir->eI == eiVVAK)
1184         {
1185             cbuf.resize(state->x.size());
1186             std::copy(state->x.begin(), state->x.end(), cbuf.begin());
1187         }
1188
1189         if (c_useGpuUpdateConstrain)
1190         {
1191             if (bNS)
1192             {
1193                 integrator->set(top.idef, *mdatoms, ekind->ngtc);
1194                 t_pbc pbc;
1195                 set_pbc(&pbc, epbcXYZ, state->box);
1196                 integrator->setPbc(&pbc);
1197             }
1198             integrator->copyCoordinatesToGpu(state->x.rvec_array());
1199             integrator->copyVelocitiesToGpu(state->v.rvec_array());
1200             integrator->copyForcesToGpu(as_rvec_array(f.data()));
1201
1202             bool doTempCouple     = (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1203             bool doPressureCouple = (ir->epc == epcPARRINELLORAHMAN && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1204
1205             // This applies Leap-Frog, LINCS and SETTLE in succession
1206             integrator->integrate(ir->delta_t, true, bCalcVir, shake_vir,
1207                                   doTempCouple, ekind->tcstat,
1208                                   doPressureCouple, ir->nstpcouple*ir->delta_t, M);
1209
1210             integrator->copyCoordinatesFromGpu(state->x.rvec_array());
1211             integrator->copyVelocitiesFromGpu(state->v.rvec_array());
1212         }
1213         else
1214         {
1215             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1216                           ekind, M, &upd, etrtPOSITION, cr, constr);
1217
1218             wallcycle_stop(wcycle, ewcUPDATE);
1219
1220             constrain_coordinates(step, &dvdl_constr, state,
1221                                   shake_vir,
1222                                   &upd, constr,
1223                                   bCalcVir, do_log, do_ene);
1224
1225             update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state,
1226                                   cr, nrnb, wcycle, &upd, constr, do_log, do_ene);
1227             finish_update(ir, mdatoms,
1228                           state, graph,
1229                           nrnb, wcycle, &upd, constr);
1230         }
1231
1232         if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1233         {
1234             updatePrevStepPullCom(pull_work, state);
1235         }
1236
1237         if (ir->eI == eiVVAK)
1238         {
1239             /* erase F_EKIN and F_TEMP here? */
1240             /* just compute the kinetic energy at the half step to perform a trotter step */
1241             compute_globals(gstat, cr, ir, fr, ekind,
1242                             state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1243                             mdatoms, nrnb, &vcm,
1244                             wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1245                             constr, &nullSignaller, lastbox,
1246                             nullptr, &bSumEkinhOld,
1247                             (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE
1248                             );
1249             wallcycle_start(wcycle, ewcUPDATE);
1250             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1251             /* now we know the scaling, we can compute the positions again */
1252             std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
1253
1254             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1255                           ekind, M, &upd, etrtPOSITION, cr, constr);
1256             wallcycle_stop(wcycle, ewcUPDATE);
1257
1258             /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1259             /* are the small terms in the shake_vir here due
1260              * to numerical errors, or are they important
1261              * physically? I'm thinking they are just errors, but not completely sure.
1262              * For now, will call without actually constraining, constr=NULL*/
1263             finish_update(ir, mdatoms,
1264                           state, graph,
1265                           nrnb, wcycle, &upd, nullptr);
1266         }
1267         if (EI_VV(ir->eI))
1268         {
1269             /* this factor or 2 correction is necessary
1270                because half of the constraint force is removed
1271                in the vv step, so we have to double it.  See
1272                the Redmine issue #1255.  It is not yet clear
1273                if the factor of 2 is exact, or just a very
1274                good approximation, and this will be
1275                investigated.  The next step is to see if this
1276                can be done adding a dhdl contribution from the
1277                rattle step, but this is somewhat more
1278                complicated with the current code. Will be
1279                investigated, hopefully for 4.6.3. However,
1280                this current solution is much better than
1281                having it completely wrong.
1282              */
1283             enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
1284         }
1285         else
1286         {
1287             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1288         }
1289
1290         if (vsite != nullptr)
1291         {
1292             wallcycle_start(wcycle, ewcVSITECONSTR);
1293             if (graph != nullptr)
1294             {
1295                 shift_self(graph, state->box, state->x.rvec_array());
1296             }
1297             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
1298                              top.idef.iparams, top.idef.il,
1299                              fr->ePBC, fr->bMolPBC, cr, state->box);
1300
1301             if (graph != nullptr)
1302             {
1303                 unshift_self(graph, state->box, state->x.rvec_array());
1304             }
1305             wallcycle_stop(wcycle, ewcVSITECONSTR);
1306         }
1307
1308         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1309         /* With Leap-Frog we can skip compute_globals at
1310          * non-communication steps, but we need to calculate
1311          * the kinetic energy one step before communication.
1312          */
1313         {
1314             // Organize to do inter-simulation signalling on steps if
1315             // and when algorithms require it.
1316             bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1317
1318             if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal)
1319             {
1320                 // Since we're already communicating at this step, we
1321                 // can propagate intra-simulation signals. Note that
1322                 // check_nstglobalcomm has the responsibility for
1323                 // choosing the value of nstglobalcomm that is one way
1324                 // bGStat becomes true, so we can't get into a
1325                 // situation where e.g. checkpointing can't be
1326                 // signalled.
1327                 bool                doIntraSimSignal = true;
1328                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1329
1330                 compute_globals(gstat, cr, ir, fr, ekind,
1331                                 state->x.rvec_array(), state->v.rvec_array(), state->box, state->lambda[efptVDW],
1332                                 mdatoms, nrnb, &vcm,
1333                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1334                                 constr, &signaller,
1335                                 lastbox,
1336                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1337                                 (bGStat ? CGLO_GSTAT : 0)
1338                                 | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1339                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1340                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1341                                 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
1342                                 | CGLO_CONSTRAINT
1343                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1344                                 );
1345                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1346                                                 top_global, &top, state->x.rvec_array(), state->box,
1347                                                 &shouldCheckNumberOfBondedInteractions);
1348                 if (!EI_VV(ir->eI) && bStopCM)
1349                 {
1350                     process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(), state->v.rvec_array());
1351                     inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1352                 }
1353             }
1354         }
1355
1356         /* #############  END CALC EKIN AND PRESSURE ################# */
1357
1358         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1359            the virial that should probably be addressed eventually. state->veta has better properies,
1360            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1361            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1362
1363         if (ir->efep != efepNO && !EI_VV(ir->eI))
1364         {
1365             /* Sum up the foreign energy and dhdl terms for md and sd.
1366                Currently done every step so that dhdl is correct in the .edr */
1367             sum_dhdl(enerd, state->lambda, *ir->fepvals);
1368         }
1369
1370         update_pcouple_after_coordinates(fplog, step, ir, mdatoms,
1371                                          pres, force_vir, shake_vir,
1372                                          parrinellorahmanMu,
1373                                          state, nrnb, &upd);
1374
1375         /* ################# END UPDATE STEP 2 ################# */
1376         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1377
1378         /* The coordinates (x) were unshifted in update */
1379         if (!bGStat)
1380         {
1381             /* We will not sum ekinh_old,
1382              * so signal that we still have to do it.
1383              */
1384             bSumEkinhOld = TRUE;
1385         }
1386
1387         if (bCalcEner)
1388         {
1389             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1390
1391             /* use the directly determined last velocity, not actually the averaged half steps */
1392             if (bTrotter && ir->eI == eiVV)
1393             {
1394                 enerd->term[F_EKIN] = last_ekin;
1395             }
1396             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1397
1398             if (integratorHasConservedEnergyQuantity(ir))
1399             {
1400                 if (EI_VV(ir->eI))
1401                 {
1402                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1403                 }
1404                 else
1405                 {
1406                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1407                 }
1408             }
1409             /* #########  END PREPARING EDR OUTPUT  ###########  */
1410         }
1411
1412         /* Output stuff */
1413         if (MASTER(cr))
1414         {
1415             if (fplog && do_log && bDoExpanded)
1416             {
1417                 /* only needed if doing expanded ensemble */
1418                 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : nullptr,
1419                                           state_global->dfhist, state->fep_state, ir->nstlog, step);
1420             }
1421             if (bCalcEner)
1422             {
1423                 energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep,
1424                                                  t, mdatoms->tmass, enerd, state,
1425                                                  ir->fepvals, ir->expandedvals, lastbox,
1426                                                  shake_vir, force_vir, total_vir, pres,
1427                                                  ekind, mu_tot, constr);
1428             }
1429             else
1430             {
1431                 energyOutput.recordNonEnergyStep();
1432             }
1433
1434             gmx_bool do_dr  = do_per_step(step, ir->nstdisreout);
1435             gmx_bool do_or  = do_per_step(step, ir->nstorireout);
1436
1437             if (doSimulatedAnnealing)
1438             {
1439                 energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
1440             }
1441             if (do_log || do_ene || do_dr || do_or)
1442             {
1443                 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1444                                                    do_log ? fplog : nullptr,
1445                                                    step, t,
1446                                                    fcd, awh.get());
1447             }
1448
1449             if (ir->bPull)
1450             {
1451                 pull_print_output(pull_work, step, t);
1452             }
1453
1454             if (do_per_step(step, ir->nstlog))
1455             {
1456                 if (fflush(fplog) != 0)
1457                 {
1458                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1459                 }
1460             }
1461         }
1462         if (bDoExpanded)
1463         {
1464             /* Have to do this part _after_ outputting the logfile and the edr file */
1465             /* Gets written into the state at the beginning of next loop*/
1466             state->fep_state = lamnew;
1467         }
1468         /* Print the remaining wall clock time for the run */
1469         if (isMasterSimMasterRank(ms, MASTER(cr)) &&
1470             (do_verbose || gmx_got_usr_signal()) &&
1471             !bPMETunePrinting)
1472         {
1473             if (shellfc)
1474             {
1475                 fprintf(stderr, "\n");
1476             }
1477             print_time(stderr, walltime_accounting, step, ir, cr);
1478         }
1479
1480         /* Ion/water position swapping.
1481          * Not done in last step since trajectory writing happens before this call
1482          * in the MD loop and exchanges would be lost anyway. */
1483         bNeedRepartition = FALSE;
1484         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
1485             do_per_step(step, ir->swap->nstswap))
1486         {
1487             bNeedRepartition = do_swapcoords(cr, step, t, ir, swap, wcycle,
1488                                              as_rvec_array(state->x.data()),
1489                                              state->box,
1490                                              MASTER(cr) && mdrunOptions.verbose,
1491                                              bRerunMD);
1492
1493             if (bNeedRepartition && DOMAINDECOMP(cr))
1494             {
1495                 dd_collect_state(cr->dd, state, state_global);
1496             }
1497         }
1498
1499         /* Replica exchange */
1500         bExchanged = FALSE;
1501         if (bDoReplEx)
1502         {
1503             bExchanged = replica_exchange(fplog, cr, ms, repl_ex,
1504                                           state_global, enerd,
1505                                           state, step, t);
1506         }
1507
1508         if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
1509         {
1510             dd_partition_system(fplog, mdlog, step, cr, TRUE, 1,
1511                                 state_global, *top_global, ir, imdSession,
1512                                 pull_work,
1513                                 state, &f, mdAtoms, &top, fr,
1514                                 vsite, constr,
1515                                 nrnb, wcycle, FALSE);
1516             shouldCheckNumberOfBondedInteractions = true;
1517             upd.setNumAtoms(state->natoms);
1518         }
1519
1520         bFirstStep             = FALSE;
1521         bInitStep              = FALSE;
1522
1523         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1524         /* With all integrators, except VV, we need to retain the pressure
1525          * at the current step for coupling at the next step.
1526          */
1527         if ((state->flags & (1U<<estPRES_PREV)) &&
1528             (bGStatEveryStep ||
1529              (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1530         {
1531             /* Store the pressure in t_state for pressure coupling
1532              * at the next MD step.
1533              */
1534             copy_mat(pres, state->pres_prev);
1535         }
1536
1537         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1538
1539         if ( (membed != nullptr) && (!bLastStep) )
1540         {
1541             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1542         }
1543
1544         cycles = wallcycle_stop(wcycle, ewcSTEP);
1545         if (DOMAINDECOMP(cr) && wcycle)
1546         {
1547             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1548         }
1549
1550         /* increase the MD step number */
1551         step++;
1552         step_rel++;
1553
1554         resetHandler->resetCounters(
1555                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(),
1556                 nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1557
1558         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1559         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1560
1561     }
1562     /* End of main MD loop */
1563
1564     /* Closing TNG files can include compressing data. Therefore it is good to do that
1565      * before stopping the time measurements. */
1566     mdoutf_tng_close(outf);
1567
1568     /* Stop measuring walltime */
1569     walltime_accounting_end_time(walltime_accounting);
1570
1571     if (!thisRankHasDuty(cr, DUTY_PME))
1572     {
1573         /* Tell the PME only node to finish */
1574         gmx_pme_send_finish(cr);
1575     }
1576
1577     if (MASTER(cr))
1578     {
1579         if (ir->nstcalcenergy > 0)
1580         {
1581             energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
1582             energyOutput.printAverages(fplog, groups);
1583         }
1584     }
1585     done_mdoutf(outf);
1586
1587     if (bPMETune)
1588     {
1589         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1590     }
1591
1592     done_shellfc(fplog, shellfc, step_rel);
1593
1594     if (useReplicaExchange && MASTER(cr))
1595     {
1596         print_replica_exchange_statistics(fplog, repl_ex);
1597     }
1598
1599     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1600
1601     global_stat_destroy(gstat);
1602
1603 }