src/gromacs/mdrun/md.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief Implements the integrator for normal molecular dynamics simulations
  40  *
  41  * \author David van der Spoel <david.vanderspoel@icm.uu.se>
  42  * \ingroup module_mdrun
  43  */
  44 #include "gmxpre.h"
  45
  46 #include <cinttypes>
  47 #include <cmath>
  48 #include <cstdio>
  49 #include <cstdlib>
  50
  51 #include <algorithm>
  52 #include <memory>
  53
  54 #include "gromacs/awh/awh.h"
  55 #include "gromacs/commandline/filenm.h"
  56 #include "gromacs/domdec/collect.h"
  57 #include "gromacs/domdec/dlbtiming.h"
  58 #include "gromacs/domdec/domdec.h"
  59 #include "gromacs/domdec/domdec_network.h"
  60 #include "gromacs/domdec/domdec_struct.h"
  61 #include "gromacs/domdec/mdsetup.h"
  62 #include "gromacs/domdec/partition.h"
  63 #include "gromacs/essentialdynamics/edsam.h"
  64 #include "gromacs/ewald/pme.h"
  65 #include "gromacs/ewald/pme_load_balancing.h"
  66 #include "gromacs/fileio/trxio.h"
  67 #include "gromacs/gmxlib/network.h"
  68 #include "gromacs/gmxlib/nrnb.h"
  69 #include "gromacs/gpu_utils/gpu_utils.h"
  70 #include "gromacs/imd/imd.h"
  71 #include "gromacs/listed_forces/manage_threading.h"
  72 #include "gromacs/math/functions.h"
  73 #include "gromacs/math/utilities.h"
  74 #include "gromacs/math/vec.h"
  75 #include "gromacs/math/vectypes.h"
  76 #include "gromacs/mdlib/checkpointhandler.h"
  77 #include "gromacs/mdlib/compute_io.h"
  78 #include "gromacs/mdlib/constr.h"
  79 #include "gromacs/mdlib/ebin.h"
  80 #include "gromacs/mdlib/enerdata_utils.h"
  81 #include "gromacs/mdlib/energyoutput.h"
  82 #include "gromacs/mdlib/expanded.h"
  83 #include "gromacs/mdlib/force.h"
  84 #include "gromacs/mdlib/force_flags.h"
  85 #include "gromacs/mdlib/forcerec.h"
  86 #include "gromacs/mdlib/md_support.h"
  87 #include "gromacs/mdlib/mdatoms.h"
  88 #include "gromacs/mdlib/mdoutf.h"
  89 #include "gromacs/mdlib/membed.h"
  90 #include "gromacs/mdlib/resethandler.h"
  91 #include "gromacs/mdlib/sighandler.h"
  92 #include "gromacs/mdlib/simulationsignal.h"
  93 #include "gromacs/mdlib/stat.h"
  94 #include "gromacs/mdlib/stophandler.h"
  95 #include "gromacs/mdlib/tgroup.h"
  96 #include "gromacs/mdlib/trajectory_writing.h"
  97 #include "gromacs/mdlib/update.h"
  98 #include "gromacs/mdlib/vcm.h"
  99 #include "gromacs/mdlib/vsite.h"
 100 #include "gromacs/mdrunutility/printtime.h"
 101 #include "gromacs/mdtypes/awh_history.h"
 102 #include "gromacs/mdtypes/awh_params.h"
 103 #include "gromacs/mdtypes/commrec.h"
 104 #include "gromacs/mdtypes/df_history.h"
 105 #include "gromacs/mdtypes/energyhistory.h"
 106 #include "gromacs/mdtypes/fcdata.h"
 107 #include "gromacs/mdtypes/forcerec.h"
 108 #include "gromacs/mdtypes/group.h"
 109 #include "gromacs/mdtypes/inputrec.h"
 110 #include "gromacs/mdtypes/interaction_const.h"
 111 #include "gromacs/mdtypes/md_enums.h"
 112 #include "gromacs/mdtypes/mdatom.h"
 113 #include "gromacs/mdtypes/mdrunoptions.h"
 114 #include "gromacs/mdtypes/observableshistory.h"
 115 #include "gromacs/mdtypes/pullhistory.h"
 116 #include "gromacs/mdtypes/state.h"
 117 #include "gromacs/nbnxm/nbnxm.h"
 118 #include "gromacs/pbcutil/mshift.h"
 119 #include "gromacs/pbcutil/pbc.h"
 120 #include "gromacs/pulling/output.h"
 121 #include "gromacs/pulling/pull.h"
 122 #include "gromacs/swap/swapcoords.h"
 123 #include "gromacs/timing/wallcycle.h"
 124 #include "gromacs/timing/walltime_accounting.h"
 125 #include "gromacs/topology/atoms.h"
 126 #include "gromacs/topology/idef.h"
 127 #include "gromacs/topology/mtop_util.h"
 128 #include "gromacs/topology/topology.h"
 129 #include "gromacs/trajectory/trajectoryframe.h"
 130 #include "gromacs/utility/basedefinitions.h"
 131 #include "gromacs/utility/cstringutil.h"
 132 #include "gromacs/utility/fatalerror.h"
 133 #include "gromacs/utility/logger.h"
 134 #include "gromacs/utility/real.h"
 135 #include "gromacs/utility/smalloc.h"
 136
 137 #include "integrator.h"
 138 #include "replicaexchange.h"
 139 #include "shellfc.h"
 140
 141 #if GMX_FAHCORE
 142 #include "corewrap.h"
 143 #endif
 144
 145 using gmx::SimulationSignaller;
 146
 147 void gmx::Integrator::do_md()
 148 {
 149     // TODO Historically, the EM and MD "integrators" used different
 150     // names for the t_inputrec *parameter, but these must have the
 151     // same name, now that it's a member of a struct. We use this ir
 152     // alias to avoid a large ripple of nearly useless changes.
 153     // t_inputrec is being replaced by IMdpOptionsProvider, so this
 154     // will go away eventually.
 155     t_inputrec             *ir   = inputrec;
 156     int64_t                 step, step_rel;
 157     double                  t = ir->init_t, t0 = ir->init_t, lam0[efptNR];
 158     gmx_bool                bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
 159     gmx_bool                bNS, bNStList, bStopCM,
 160                             bFirstStep, bInitStep, bLastStep = FALSE;
 161     gmx_bool                bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
 162     gmx_bool                do_ene, do_log, do_verbose;
 163     gmx_bool                bMasterState;
 164     int                     force_flags, cglo_flags;
 165     tensor                  force_vir = {{0}}, shake_vir = {{0}}, total_vir = {{0}},
 166                             tmp_vir   = {{0}}, pres = {{0}};
 167     int                     i, m;
 168     rvec                    mu_tot;
 169     matrix                  parrinellorahmanMu, M;
 170     gmx_repl_ex_t           repl_ex = nullptr;
 171     gmx_localtop_t          top;
 172     PaddedVector<gmx::RVec> f {};
 173     gmx_global_stat_t       gstat;
 174     t_graph                *graph = nullptr;
 175     gmx_shellfc_t          *shellfc;
 176     gmx_bool                bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
 177     gmx_bool                bTemp, bPres, bTrotter;
 178     real                    dvdl_constr;
 179     rvec                   *cbuf        = nullptr;
 180     int                     cbuf_nalloc = 0;
 181     matrix                  lastbox;
 182     int                     lamnew  = 0;
 183     /* for FEP */
 184     int                     nstfep = 0;
 185     double                  cycles;
 186     real                    saved_conserved_quantity = 0;
 187     real                    last_ekin                = 0;
 188     t_extmass               MassQ;
 189     char                    sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
 190
 191     /* PME load balancing data for GPU kernels */
 192     gmx_bool              bPMETune         = FALSE;
 193     gmx_bool              bPMETunePrinting = FALSE;
 194
 195     bool                  bInteractiveMDstep = false;
 196
 197     /* Domain decomposition could incorrectly miss a bonded
 198        interaction, but checking for that requires a global
 199        communication stage, which does not otherwise happen in DD
 200        code. So we do that alongside the first global energy reduction
 201        after a new DD is made. These variables handle whether the
 202        check happens, and the result it returns. */
 203     bool              shouldCheckNumberOfBondedInteractions = false;
 204     int               totalNumberOfBondedInteractions       = -1;
 205
 206     SimulationSignals signals;
 207     // Most global communnication stages don't propagate mdrun
 208     // signals, and will use this object to achieve that.
 209     SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
 210
 211     if (!mdrunOptions.writeConfout)
 212     {
 213         // This is on by default, and the main known use case for
 214         // turning it off is for convenience in benchmarking, which is
 215         // something that should not show up in the general user
 216         // interface.
 217         GMX_LOG(mdlog.info).asParagraph().
 218             appendText("The -noconfout functionality is deprecated, and may be removed in a future version.");
 219     }
 220
 221     /* md-vv uses averaged full step velocities for T-control
 222        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 223        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 224     bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
 225
 226     const bool bRerunMD      = false;
 227
 228     int        nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
 229     bGStatEveryStep = (nstglobalcomm == 1);
 230
 231     SimulationGroups                  *groups = &top_global->groups;
 232
 233     std::unique_ptr<EssentialDynamics> ed = nullptr;
 234     if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr)
 235     {
 236         /* Initialize essential dynamics sampling */
 237         ed = init_edsam(mdlog,
 238                         opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm),
 239                         top_global,
 240                         ir, cr, constr,
 241                         state_global, observablesHistory,
 242                         oenv, mdrunOptions.continuationOptions.appendFiles);
 243     }
 244
 245     initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
 246     Update upd(ir, deform);
 247     bool   doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
 248     if (!mdrunOptions.continuationOptions.appendFiles)
 249     {
 250         pleaseCiteCouplingAlgorithms(fplog, *ir);
 251     }
 252     init_nrnb(nrnb);
 253     gmx_mdoutf       *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, top_global, oenv, wcycle);
 254     gmx::EnergyOutput energyOutput;
 255     energyOutput.prepare(mdoutf_get_fp_ene(outf), top_global, ir, mdoutf_get_fp_dhdl(outf));
 256
 257     /* Kinetic energy data */
 258     std::unique_ptr<gmx_ekindata_t> eKinData = std::make_unique<gmx_ekindata_t>();
 259     gmx_ekindata_t                 *ekind    = eKinData.get();
 260     init_ekindata(fplog, top_global, &(ir->opts), ekind);
 261     /* Copy the cos acceleration to the groups struct */
 262     ekind->cosacc.cos_accel = ir->cos_accel;
 263
 264     gstat = global_stat_init(ir);
 265
 266     /* Check for polarizable models and flexible constraints */
 267     shellfc = init_shell_flexcon(fplog,
 268                                  top_global, constr ? constr->numFlexibleConstraints() : 0,
 269                                  ir->nstcalcenergy, DOMAINDECOMP(cr));
 270
 271     {
 272         double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
 273         if ((io > 2000) && MASTER(cr))
 274         {
 275             fprintf(stderr,
 276                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 277                     io);
 278         }
 279     }
 280
 281     // Local state only becomes valid now.
 282     std::unique_ptr<t_state> stateInstance;
 283     t_state *                state;
 284
 285     if (DOMAINDECOMP(cr))
 286     {
 287         dd_init_local_top(*top_global, &top);
 288
 289         stateInstance = std::make_unique<t_state>();
 290         state         = stateInstance.get();
 291         dd_init_local_state(cr->dd, state_global, state);
 292
 293         /* Distribute the charge groups over the nodes from the master node */
 294         dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1,
 295                             state_global, *top_global, ir, imdSession,
 296                             state, &f, mdAtoms, &top, fr,
 297                             vsite, constr,
 298                             nrnb, nullptr, FALSE);
 299         shouldCheckNumberOfBondedInteractions = true;
 300         upd.setNumAtoms(state->natoms);
 301     }
 302     else
 303     {
 304         state_change_natoms(state_global, state_global->natoms);
 305         f.resizeWithPadding(state_global->natoms);
 306         /* Copy the pointer to the global state */
 307         state = state_global;
 308
 309         /* Generate and initialize new topology */
 310         mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr,
 311                                   &graph, mdAtoms, constr, vsite, shellfc);
 312
 313         upd.setNumAtoms(state->natoms);
 314     }
 315
 316     auto mdatoms = mdAtoms->mdatoms();
 317
 318     // NOTE: The global state is no longer used at this point.
 319     // But state_global is still used as temporary storage space for writing
 320     // the global state to file and potentially for replica exchange.
 321     // (Global topology should persist.)
 322
 323     update_mdatoms(mdatoms, state->lambda[efptMASS]);
 324
 325     const ContinuationOptions &continuationOptions    = mdrunOptions.continuationOptions;
 326     bool                       startingFromCheckpoint = continuationOptions.startedFromCheckpoint;
 327
 328     if (ir->bExpanded)
 329     {
 330         /* Check nstexpanded here, because the grompp check was broken */
 331         if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
 332         {
 333             gmx_fatal(FARGS, "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
 334         }
 335         init_expanded_ensemble(startingFromCheckpoint, ir, state->dfhist);
 336     }
 337
 338     if (MASTER(cr))
 339     {
 340         if (startingFromCheckpoint)
 341         {
 342             /* Restore from energy history if appending to output files */
 343             if (continuationOptions.appendFiles)
 344             {
 345                 /* If no history is available (because a checkpoint is from before
 346                  * it was written) make a new one later, otherwise restore it.
 347                  */
 348                 if (observablesHistory->energyHistory)
 349                 {
 350                     energyOutput.restoreFromEnergyHistory(*observablesHistory->energyHistory);
 351                 }
 352             }
 353             else if (observablesHistory->energyHistory)
 354             {
 355                 /* We might have read an energy history from checkpoint.
 356                  * As we are not appending, we want to restart the statistics.
 357                  * Free the allocated memory and reset the counts.
 358                  */
 359                 observablesHistory->energyHistory = {};
 360                 /* We might have read a pull history from checkpoint.
 361                  * We will still want to keep the statistics, so that the files
 362                  * can be joined and still be meaningful.
 363                  * This means that observablesHistory->pullHistory
 364                  * should not be reset.
 365                  */
 366             }
 367         }
 368         if (!observablesHistory->energyHistory)
 369         {
 370             observablesHistory->energyHistory = std::make_unique<energyhistory_t>();
 371         }
 372         if (!observablesHistory->pullHistory)
 373         {
 374             observablesHistory->pullHistory = std::make_unique<PullHistory>();
 375         }
 376         /* Set the initial energy history */
 377         energyOutput.fillEnergyHistory(observablesHistory->energyHistory.get());
 378     }
 379
 380     preparePrevStepPullCom(ir, mdatoms, state, state_global, cr, startingFromCheckpoint);
 381
 382     // TODO: Remove this by converting AWH into a ForceProvider
 383     auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingFromCheckpoint,
 384                                 shellfc != nullptr,
 385                                 opt2fn("-awh", nfile, fnm), ir->pull_work);
 386
 387     const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
 388     if (useReplicaExchange && MASTER(cr))
 389     {
 390         repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir,
 391                                         replExParams);
 392     }
 393     /* PME tuning is only supported in the Verlet scheme, with PME for
 394      * Coulomb. It is not supported with only LJ PME. */
 395     bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) &&
 396                 !mdrunOptions.reproducible && ir->cutoff_scheme != ecutsGROUP);
 397
 398     pme_load_balancing_t *pme_loadbal      = nullptr;
 399     if (bPMETune)
 400     {
 401         pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
 402                          *fr->ic, *fr->nbv, fr->pmedata, fr->nbv->useGpu(),
 403                          &bPMETunePrinting);
 404     }
 405
 406     if (!ir->bContinuation)
 407     {
 408         if (state->flags & (1 << estV))
 409         {
 410             auto v = makeArrayRef(state->v);
 411             /* Set the velocities of vsites, shells and frozen atoms to zero */
 412             for (i = 0; i < mdatoms->homenr; i++)
 413             {
 414                 if (mdatoms->ptype[i] == eptVSite ||
 415                     mdatoms->ptype[i] == eptShell)
 416                 {
 417                     clear_rvec(v[i]);
 418                 }
 419                 else if (mdatoms->cFREEZE)
 420                 {
 421                     for (m = 0; m < DIM; m++)
 422                     {
 423                         if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 424                         {
 425                             v[i][m] = 0;
 426                         }
 427                     }
 428                 }
 429             }
 430         }
 431
 432         if (constr)
 433         {
 434             /* Constrain the initial coordinates and velocities */
 435             do_constrain_first(fplog, constr, ir, mdatoms, state);
 436         }
 437         if (vsite)
 438         {
 439             /* Construct the virtual sites for the initial configuration */
 440             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr,
 441                              top.idef.iparams, top.idef.il,
 442                              fr->ePBC, fr->bMolPBC, cr, state->box);
 443         }
 444     }
 445
 446     if (ir->efep != efepNO)
 447     {
 448         /* Set free energy calculation frequency as the greatest common
 449          * denominator of nstdhdl and repl_ex_nst. */
 450         nstfep = ir->fepvals->nstdhdl;
 451         if (ir->bExpanded)
 452         {
 453             nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
 454         }
 455         if (useReplicaExchange)
 456         {
 457             nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
 458         }
 459     }
 460
 461     /* Be REALLY careful about what flags you set here. You CANNOT assume
 462      * this is the first step, since we might be restarting from a checkpoint,
 463      * and in that case we should not do any modifications to the state.
 464      */
 465     bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
 466
 467     if (continuationOptions.haveReadEkin)
 468     {
 469         restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
 470     }
 471
 472     cglo_flags = (CGLO_INITIALIZATION | CGLO_TEMPERATURE | CGLO_GSTAT
 473                   | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
 474                   | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0)
 475                   | (continuationOptions.haveReadEkin ? CGLO_READEKIN : 0));
 476
 477     bSumEkinhOld = FALSE;
 478
 479     t_vcm vcm(top_global->groups, *ir);
 480     reportComRemovalInfo(fplog, vcm);
 481
 482     /* To minimize communication, compute_globals computes the COM velocity
 483      * and the kinetic energy for the velocities without COM motion removed.
 484      * Thus to get the kinetic energy without the COM contribution, we need
 485      * to call compute_globals twice.
 486      */
 487     for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
 488     {
 489         int cglo_flags_iteration = cglo_flags;
 490         if (bStopCM && cgloIteration == 0)
 491         {
 492             cglo_flags_iteration |= CGLO_STOPCM;
 493             cglo_flags_iteration &= ~CGLO_TEMPERATURE;
 494         }
 495         compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
 496                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 497                         constr, &nullSignaller, state->box,
 498                         &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags_iteration
 499                         | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0));
 500     }
 501     checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
 502                                     top_global, &top, state,
 503                                     &shouldCheckNumberOfBondedInteractions);
 504     if (ir->eI == eiVVAK)
 505     {
 506         /* a second call to get the half step temperature initialized as well */
 507         /* we do the same call as above, but turn the pressure off -- internally to
 508            compute_globals, this is recognized as a velocity verlet half-step
 509            kinetic energy calculation.  This minimized excess variables, but
 510            perhaps loses some logic?*/
 511
 512         compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
 513                         nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 514                         constr, &nullSignaller, state->box,
 515                         nullptr, &bSumEkinhOld,
 516                         cglo_flags & ~CGLO_PRESSURE);
 517     }
 518
 519     /* Calculate the initial half step temperature, and save the ekinh_old */
 520     if (!continuationOptions.startedFromCheckpoint)
 521     {
 522         for (i = 0; (i < ir->opts.ngtc); i++)
 523         {
 524             copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
 525         }
 526     }
 527
 528     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
 529        temperature control */
 530     auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
 531
 532     if (MASTER(cr))
 533     {
 534         if (!ir->bContinuation)
 535         {
 536             if (constr && ir->eConstrAlg == econtLINCS)
 537             {
 538                 fprintf(fplog,
 539                         "RMS relative constraint deviation after constraining: %.2e\n",
 540                         constr->rmsd());
 541             }
 542             if (EI_STATE_VELOCITY(ir->eI))
 543             {
 544                 real temp = enerd->term[F_TEMP];
 545                 if (ir->eI != eiVV)
 546                 {
 547                     /* Result of Ekin averaged over velocities of -half
 548                      * and +half step, while we only have -half step here.
 549                      */
 550                     temp *= 2;
 551                 }
 552                 fprintf(fplog, "Initial temperature: %g K\n", temp);
 553             }
 554         }
 555
 556         char tbuf[20];
 557         fprintf(stderr, "starting mdrun '%s'\n",
 558                 *(top_global->name));
 559         if (ir->nsteps >= 0)
 560         {
 561             sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
 562         }
 563         else
 564         {
 565             sprintf(tbuf, "%s", "infinite");
 566         }
 567         if (ir->init_step > 0)
 568         {
 569             fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 570                     gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
 571                     gmx_step_str(ir->init_step, sbuf2),
 572                     ir->init_step*ir->delta_t);
 573         }
 574         else
 575         {
 576             fprintf(stderr, "%s steps, %s ps.\n",
 577                     gmx_step_str(ir->nsteps, sbuf), tbuf);
 578         }
 579         fprintf(fplog, "\n");
 580     }
 581
 582     walltime_accounting_start_time(walltime_accounting);
 583     wallcycle_start(wcycle, ewcRUN);
 584     print_start(fplog, cr, walltime_accounting, "mdrun");
 585
 586 #if GMX_FAHCORE
 587     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 588     int chkpt_ret = fcCheckPointParallel( cr->nodeid,
 589                                           NULL, 0);
 590     if (chkpt_ret == 0)
 591     {
 592         gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
 593     }
 594 #endif
 595
 596     /***********************************************************
 597      *
 598      *             Loop over MD steps
 599      *
 600      ************************************************************/
 601
 602     bFirstStep       = TRUE;
 603     /* Skip the first Nose-Hoover integration when we get the state from tpx */
 604     bInitStep        = !startingFromCheckpoint || EI_VV(ir->eI);
 605     bSumEkinhOld     = FALSE;
 606     bExchanged       = FALSE;
 607     bNeedRepartition = FALSE;
 608
 609     bool simulationsShareState = false;
 610     int  nstSignalComm         = nstglobalcomm;
 611     {
 612         // TODO This implementation of ensemble orientation restraints is nasty because
 613         // a user can't just do multi-sim with single-sim orientation restraints.
 614         bool usingEnsembleRestraints = (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
 615         bool awhUsesMultiSim         = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
 616
 617         // Replica exchange, ensemble restraints and AWH need all
 618         // simulations to remain synchronized, so they need
 619         // checkpoints and stop conditions to act on the same step, so
 620         // the propagation of such signals must take place between
 621         // simulations, not just within simulations.
 622         // TODO: Make algorithm initializers set these flags.
 623         simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
 624
 625         if (simulationsShareState)
 626         {
 627             // Inter-simulation signal communication does not need to happen
 628             // often, so we use a minimum of 200 steps to reduce overhead.
 629             const int c_minimumInterSimulationSignallingInterval = 200;
 630             nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1)/nstglobalcomm)*nstglobalcomm;
 631         }
 632     }
 633
 634     auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
 635                 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
 636                 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
 637                 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
 638
 639     auto checkpointHandler = std::make_unique<CheckpointHandler>(
 640                 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]),
 641                 simulationsShareState, ir->nstlist == 0, MASTER(cr),
 642                 mdrunOptions.writeConfout, mdrunOptions.checkpointOptions.period);
 643
 644     const bool resetCountersIsLocal = true;
 645     auto       resetHandler         = std::make_unique<ResetHandler>(
 646                 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]), !resetCountersIsLocal,
 647                 ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
 648                 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
 649
 650     const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
 651
 652     step     = ir->init_step;
 653     step_rel = 0;
 654
 655     // TODO extract this to new multi-simulation module
 656     if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
 657     {
 658         if (!multisim_int_all_are_equal(ms, ir->nsteps))
 659         {
 660             GMX_LOG(mdlog.warning).appendText(
 661                     "Note: The number of steps is not consistent across multi simulations,\n"
 662                     "but we are proceeding anyway!");
 663         }
 664         if (!multisim_int_all_are_equal(ms, ir->init_step))
 665         {
 666             GMX_LOG(mdlog.warning).appendText(
 667                     "Note: The initial step is not consistent across multi simulations,\n"
 668                     "but we are proceeding anyway!");
 669         }
 670     }
 671
 672     /* and stop now if we should */
 673     bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
 674     while (!bLastStep)
 675     {
 676
 677         /* Determine if this is a neighbor search step */
 678         bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
 679
 680         if (bPMETune && bNStList)
 681         {
 682             /* PME grid + cut-off optimization with GPUs or PME nodes */
 683             pme_loadbal_do(pme_loadbal, cr,
 684                            (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
 685                            fplog, mdlog,
 686                            *ir, fr, *state,
 687                            wcycle,
 688                            step, step_rel,
 689                            &bPMETunePrinting);
 690         }
 691
 692         wallcycle_start(wcycle, ewcSTEP);
 693
 694         bLastStep = (step_rel == ir->nsteps);
 695         t         = t0 + step*ir->delta_t;
 696
 697         // TODO Refactor this, so that nstfep does not need a default value of zero
 698         if (ir->efep != efepNO || ir->bSimTemp)
 699         {
 700             /* find and set the current lambdas */
 701             setCurrentLambdasLocal(step, ir->fepvals, lam0, state);
 702
 703             bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
 704             bDoFEP       = ((ir->efep != efepNO) && do_per_step(step, nstfep));
 705             bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
 706                             && (ir->bExpanded) && (step > 0) && (!startingFromCheckpoint));
 707         }
 708
 709         bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep &&
 710                      do_per_step(step, replExParams.exchangeInterval));
 711
 712         if (doSimulatedAnnealing)
 713         {
 714             update_annealing_target_temp(ir, t, &upd);
 715         }
 716
 717         /* Stop Center of Mass motion */
 718         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
 719
 720         /* Determine whether or not to do Neighbour Searching */
 721         bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
 722
 723         bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
 724
 725         /* do_log triggers energy and virial calculation. Because this leads
 726          * to different code paths, forces can be different. Thus for exact
 727          * continuation we should avoid extra log output.
 728          * Note that the || bLastStep can result in non-exact continuation
 729          * beyond the last step. But we don't consider that to be an issue.
 730          */
 731         do_log     = do_per_step(step, ir->nstlog) || (bFirstStep && !startingFromCheckpoint) || bLastStep;
 732         do_verbose = mdrunOptions.verbose &&
 733             (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
 734
 735         if (bNS && !(bFirstStep && ir->bContinuation))
 736         {
 737             bMasterState = FALSE;
 738             /* Correct the new box if it is too skewed */
 739             if (inputrecDynamicBox(ir))
 740             {
 741                 if (correct_box(fplog, step, state->box, graph))
 742                 {
 743                     bMasterState = TRUE;
 744                 }
 745             }
 746             if (DOMAINDECOMP(cr) && bMasterState)
 747             {
 748                 dd_collect_state(cr->dd, state, state_global);
 749             }
 750
 751             if (DOMAINDECOMP(cr))
 752             {
 753                 /* Repartition the domain decomposition */
 754                 dd_partition_system(fplog, mdlog, step, cr,
 755                                     bMasterState, nstglobalcomm,
 756                                     state_global, *top_global, ir, imdSession,
 757                                     state, &f, mdAtoms, &top, fr,
 758                                     vsite, constr,
 759                                     nrnb, wcycle,
 760                                     do_verbose && !bPMETunePrinting);
 761                 shouldCheckNumberOfBondedInteractions = true;
 762                 upd.setNumAtoms(state->natoms);
 763             }
 764         }
 765
 766         if (MASTER(cr) && do_log)
 767         {
 768             print_ebin_header(fplog, step, t); /* can we improve the information printed here? */
 769         }
 770
 771         if (ir->efep != efepNO)
 772         {
 773             update_mdatoms(mdatoms, state->lambda[efptMASS]);
 774         }
 775
 776         if (bExchanged)
 777         {
 778
 779             /* We need the kinetic energy at minus the half step for determining
 780              * the full step kinetic energy and possibly for T-coupling.*/
 781             /* This may not be quite working correctly yet . . . . */
 782             compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
 783                             wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
 784                             constr, &nullSignaller, state->box,
 785                             &totalNumberOfBondedInteractions, &bSumEkinhOld,
 786                             CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
 787             checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
 788                                             top_global, &top, state,
 789                                             &shouldCheckNumberOfBondedInteractions);
 790         }
 791         clear_mat(force_vir);
 792
 793         checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
 794
 795         /* Determine the energy and pressure:
 796          * at nstcalcenergy steps and at energy output steps (set below).
 797          */
 798         if (EI_VV(ir->eI) && (!bInitStep))
 799         {
 800             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
 801             bCalcVir      = bCalcEnerStep ||
 802                 (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
 803         }
 804         else
 805         {
 806             bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
 807             bCalcVir      = bCalcEnerStep ||
 808                 (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
 809         }
 810         bCalcEner = bCalcEnerStep;
 811
 812         do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
 813
 814         if (do_ene || do_log || bDoReplEx)
 815         {
 816             bCalcVir  = TRUE;
 817             bCalcEner = TRUE;
 818         }
 819
 820         /* Do we need global communication ? */
 821         bGStat = (bCalcVir || bCalcEner || bStopCM ||
 822                   do_per_step(step, nstglobalcomm) ||
 823                   (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm)));
 824
 825         force_flags = (GMX_FORCE_STATECHANGED |
 826                        ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) |
 827                        GMX_FORCE_ALLFORCES |
 828                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
 829                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
 830                        (bDoFEP ? GMX_FORCE_DHDL : 0)
 831                        );
 832
 833         if (shellfc)
 834         {
 835             /* Now is the time to relax the shells */
 836             relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose,
 837                                 enforcedRotation, step,
 838                                 ir, imdSession, bNS, force_flags, &top,
 839                                 constr, enerd, fcd,
 840                                 state, f.arrayRefWithPadding(), force_vir, mdatoms,
 841                                 nrnb, wcycle, graph,
 842                                 shellfc, fr, ppForceWorkload, t, mu_tot,
 843                                 vsite,
 844                                 ddBalanceRegionHandler);
 845         }
 846         else
 847         {
 848             /* The AWH history need to be saved _before_ doing force calculations where the AWH bias is updated
 849                (or the AWH update will be performed twice for one step when continuing). It would be best to
 850                call this update function from do_md_trajectory_writing but that would occur after do_force.
 851                One would have to divide the update_awh function into one function applying the AWH force
 852                and one doing the AWH bias update. The update AWH bias function could then be called after
 853                do_md_trajectory_writing (then containing update_awh_history).
 854                The checkpointing will in the future probably moved to the start of the md loop which will
 855                rid of this issue. */
 856             if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
 857             {
 858                 awh->updateHistory(state_global->awhHistory.get());
 859             }
 860
 861             /* The coordinates (x) are shifted (to get whole molecules)
 862              * in do_force.
 863              * This is parallellized as well, and does communication too.
 864              * Check comments in sim_util.c
 865              */
 866             do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession,
 867                      step, nrnb, wcycle, &top,
 868                      state->box, state->x.arrayRefWithPadding(), &state->hist,
 869                      f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd,
 870                      state->lambda, graph,
 871                      fr, ppForceWorkload, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
 872                      (bNS ? GMX_FORCE_NS : 0) | force_flags,
 873                      ddBalanceRegionHandler);
 874         }
 875
 876         if (EI_VV(ir->eI) && !startingFromCheckpoint)
 877         /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
 878         {
 879             rvec *vbuf = nullptr;
 880
 881             wallcycle_start(wcycle, ewcUPDATE);
 882             if (ir->eI == eiVV && bInitStep)
 883             {
 884                 /* if using velocity verlet with full time step Ekin,
 885                  * take the first half step only to compute the
 886                  * virial for the first step. From there,
 887                  * revert back to the initial coordinates
 888                  * so that the input is actually the initial step.
 889                  */
 890                 snew(vbuf, state->natoms);
 891                 copy_rvecn(state->v.rvec_array(), vbuf, 0, state->natoms); /* should make this better for parallelizing? */
 892             }
 893             else
 894             {
 895                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
 896                 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
 897             }
 898
 899             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
 900                           ekind, M, &upd, etrtVELOCITY1,
 901                           cr, constr);
 902
 903             wallcycle_stop(wcycle, ewcUPDATE);
 904             constrain_velocities(step, nullptr,
 905                                  state,
 906                                  shake_vir,
 907                                  constr,
 908                                  bCalcVir, do_log, do_ene);
 909             wallcycle_start(wcycle, ewcUPDATE);
 910             /* if VV, compute the pressure and constraints */
 911             /* For VV2, we strictly only need this if using pressure
 912              * control, but we really would like to have accurate pressures
 913              * printed out.
 914              * Think about ways around this in the future?
 915              * For now, keep this choice in comments.
 916              */
 917             /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
 918             /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
 919             bPres = TRUE;
 920             bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
 921             if (bCalcEner && ir->eI == eiVVAK)
 922             {
 923                 bSumEkinhOld = TRUE;
 924             }
 925             /* for vv, the first half of the integration actually corresponds to the previous step.
 926                So we need information from the last step in the first half of the integration */
 927             if (bGStat || do_per_step(step-1, nstglobalcomm))
 928             {
 929                 wallcycle_stop(wcycle, ewcUPDATE);
 930                 compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
 931                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 932                                 constr, &nullSignaller, state->box,
 933                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
 934                                 (bGStat ? CGLO_GSTAT : 0)
 935                                 | (bCalcEner ? CGLO_ENERGY : 0)
 936                                 | (bTemp ? CGLO_TEMPERATURE : 0)
 937                                 | (bPres ? CGLO_PRESSURE : 0)
 938                                 | (bPres ? CGLO_CONSTRAINT : 0)
 939                                 | (bStopCM ? CGLO_STOPCM : 0)
 940                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
 941                                 | CGLO_SCALEEKIN
 942                                 );
 943                 /* explanation of above:
 944                    a) We compute Ekin at the full time step
 945                    if 1) we are using the AveVel Ekin, and it's not the
 946                    initial step, or 2) if we are using AveEkin, but need the full
 947                    time step kinetic energy for the pressure (always true now, since we want accurate statistics).
 948                    b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
 949                    EkinAveVel because it's needed for the pressure */
 950                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
 951                                                 top_global, &top, state,
 952                                                 &shouldCheckNumberOfBondedInteractions);
 953                 wallcycle_start(wcycle, ewcUPDATE);
 954             }
 955             /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
 956             if (!bInitStep)
 957             {
 958                 if (bTrotter)
 959                 {
 960                     m_add(force_vir, shake_vir, total_vir);     /* we need the un-dispersion corrected total vir here */
 961                     trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
 962
 963                     /* TODO This is only needed when we're about to write
 964                      * a checkpoint, because we use it after the restart
 965                      * (in a kludge?). But what should we be doing if
 966                      * startingFromCheckpoint or bInitStep are true? */
 967                     if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
 968                     {
 969                         copy_mat(shake_vir, state->svir_prev);
 970                         copy_mat(force_vir, state->fvir_prev);
 971                     }
 972                     if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
 973                     {
 974                         /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
 975                         enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
 976                         enerd->term[F_EKIN] = trace(ekind->ekin);
 977                     }
 978                 }
 979                 else if (bExchanged)
 980                 {
 981                     wallcycle_stop(wcycle, ewcUPDATE);
 982                     /* We need the kinetic energy at minus the half step for determining
 983                      * the full step kinetic energy and possibly for T-coupling.*/
 984                     /* This may not be quite working correctly yet . . . . */
 985                     compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
 986                                     wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot,
 987                                     constr, &nullSignaller, state->box,
 988                                     nullptr, &bSumEkinhOld,
 989                                     CGLO_GSTAT | CGLO_TEMPERATURE);
 990                     wallcycle_start(wcycle, ewcUPDATE);
 991                 }
 992             }
 993             /* if it's the initial step, we performed this first step just to get the constraint virial */
 994             if (ir->eI == eiVV && bInitStep)
 995             {
 996                 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
 997                 sfree(vbuf);
 998             }
 999             wallcycle_stop(wcycle, ewcUPDATE);
1000         }
1001
1002         /* compute the conserved quantity */
1003         if (EI_VV(ir->eI))
1004         {
1005             saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1006             if (ir->eI == eiVV)
1007             {
1008                 last_ekin = enerd->term[F_EKIN];
1009             }
1010             if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1011             {
1012                 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1013             }
1014             /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
1015             if (ir->efep != efepNO)
1016             {
1017                 sum_dhdl(enerd, state->lambda, ir->fepvals);
1018             }
1019         }
1020
1021         /* ########  END FIRST UPDATE STEP  ############## */
1022         /* ########  If doing VV, we now have v(dt) ###### */
1023         if (bDoExpanded)
1024         {
1025             /* perform extended ensemble sampling in lambda - we don't
1026                actually move to the new state before outputting
1027                statistics, but if performing simulated tempering, we
1028                do update the velocities and the tau_t. */
1029
1030             lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, state->dfhist, step, state->v.rvec_array(), mdatoms);
1031             /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1032             if (MASTER(cr))
1033             {
1034                 copy_df_history(state_global->dfhist, state->dfhist);
1035             }
1036         }
1037
1038         /* Now we have the energies and forces corresponding to the
1039          * coordinates at time t. We must output all of this before
1040          * the update.
1041          */
1042         do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
1043                                  ir, state, state_global, observablesHistory,
1044                                  top_global, fr,
1045                                  outf, energyOutput, ekind, f,
1046                                  checkpointHandler->isCheckpointingStep(),
1047                                  bRerunMD, bLastStep,
1048                                  mdrunOptions.writeConfout,
1049                                  bSumEkinhOld);
1050         /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1051         bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1052
1053         /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1054         if (startingFromCheckpoint && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1055         {
1056             copy_mat(state->svir_prev, shake_vir);
1057             copy_mat(state->fvir_prev, force_vir);
1058         }
1059
1060         stopHandler->setSignal();
1061         resetHandler->setSignal(walltime_accounting);
1062
1063         if (bGStat || !PAR(cr))
1064         {
1065             /* In parallel we only have to check for checkpointing in steps
1066              * where we do global communication,
1067              *  otherwise the other nodes don't know.
1068              */
1069             checkpointHandler->setSignal(walltime_accounting);
1070         }
1071
1072         /* #########   START SECOND UPDATE STEP ################# */
1073
1074         /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
1075            in preprocessing */
1076
1077         if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1078         {
1079             gmx_bool bIfRandomize;
1080             bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1081             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1082             if (constr && bIfRandomize)
1083             {
1084                 constrain_velocities(step, nullptr,
1085                                      state,
1086                                      tmp_vir,
1087                                      constr,
1088                                      bCalcVir, do_log, do_ene);
1089             }
1090         }
1091         /* Box is changed in update() when we do pressure coupling,
1092          * but we should still use the old box for energy corrections and when
1093          * writing it to the energy file, so it matches the trajectory files for
1094          * the same timestep above. Make a copy in a separate array.
1095          */
1096         copy_mat(state->box, lastbox);
1097
1098         dvdl_constr = 0;
1099
1100         wallcycle_start(wcycle, ewcUPDATE);
1101         /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1102         if (bTrotter)
1103         {
1104             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1105             /* We can only do Berendsen coupling after we have summed
1106              * the kinetic energy or virial. Since the happens
1107              * in global_state after update, we should only do it at
1108              * step % nstlist = 1 with bGStatEveryStep=FALSE.
1109              */
1110         }
1111         else
1112         {
1113             update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1114             update_pcouple_before_coordinates(fplog, step, ir, state,
1115                                               parrinellorahmanMu, M,
1116                                               bInitStep);
1117         }
1118
1119         if (EI_VV(ir->eI))
1120         {
1121             /* velocity half-step update */
1122             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1123                           ekind, M, &upd, etrtVELOCITY2,
1124                           cr, constr);
1125         }
1126
1127         /* Above, initialize just copies ekinh into ekin,
1128          * it doesn't copy position (for VV),
1129          * and entire integrator for MD.
1130          */
1131
1132         if (ir->eI == eiVVAK)
1133         {
1134             /* We probably only need md->homenr, not state->natoms */
1135             if (state->natoms > cbuf_nalloc)
1136             {
1137                 cbuf_nalloc = state->natoms;
1138                 srenew(cbuf, cbuf_nalloc);
1139             }
1140             copy_rvecn(as_rvec_array(state->x.data()), cbuf, 0, state->natoms);
1141         }
1142
1143         update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1144                       ekind, M, &upd, etrtPOSITION, cr, constr);
1145         wallcycle_stop(wcycle, ewcUPDATE);
1146
1147         constrain_coordinates(step, &dvdl_constr, state,
1148                               shake_vir,
1149                               &upd, constr,
1150                               bCalcVir, do_log, do_ene);
1151         update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state,
1152                               cr, nrnb, wcycle, &upd, constr, do_log, do_ene);
1153         finish_update(ir, mdatoms,
1154                       state, graph,
1155                       nrnb, wcycle, &upd, constr);
1156
1157         if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1158         {
1159             updatePrevStepPullCom(ir->pull_work, state);
1160         }
1161
1162         if (ir->eI == eiVVAK)
1163         {
1164             /* erase F_EKIN and F_TEMP here? */
1165             /* just compute the kinetic energy at the half step to perform a trotter step */
1166             compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
1167                             wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1168                             constr, &nullSignaller, lastbox,
1169                             nullptr, &bSumEkinhOld,
1170                             (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE
1171                             );
1172             wallcycle_start(wcycle, ewcUPDATE);
1173             trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1174             /* now we know the scaling, we can compute the positions again again */
1175             copy_rvecn(cbuf, as_rvec_array(state->x.data()), 0, state->natoms);
1176
1177             update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
1178                           ekind, M, &upd, etrtPOSITION, cr, constr);
1179             wallcycle_stop(wcycle, ewcUPDATE);
1180
1181             /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1182             /* are the small terms in the shake_vir here due
1183              * to numerical errors, or are they important
1184              * physically? I'm thinking they are just errors, but not completely sure.
1185              * For now, will call without actually constraining, constr=NULL*/
1186             finish_update(ir, mdatoms,
1187                           state, graph,
1188                           nrnb, wcycle, &upd, nullptr);
1189         }
1190         if (EI_VV(ir->eI))
1191         {
1192             /* this factor or 2 correction is necessary
1193                because half of the constraint force is removed
1194                in the vv step, so we have to double it.  See
1195                the Redmine issue #1255.  It is not yet clear
1196                if the factor of 2 is exact, or just a very
1197                good approximation, and this will be
1198                investigated.  The next step is to see if this
1199                can be done adding a dhdl contribution from the
1200                rattle step, but this is somewhat more
1201                complicated with the current code. Will be
1202                investigated, hopefully for 4.6.3. However,
1203                this current solution is much better than
1204                having it completely wrong.
1205              */
1206             enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
1207         }
1208         else
1209         {
1210             enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1211         }
1212
1213         if (vsite != nullptr)
1214         {
1215             wallcycle_start(wcycle, ewcVSITECONSTR);
1216             if (graph != nullptr)
1217             {
1218                 shift_self(graph, state->box, state->x.rvec_array());
1219             }
1220             construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
1221                              top.idef.iparams, top.idef.il,
1222                              fr->ePBC, fr->bMolPBC, cr, state->box);
1223
1224             if (graph != nullptr)
1225             {
1226                 unshift_self(graph, state->box, state->x.rvec_array());
1227             }
1228             wallcycle_stop(wcycle, ewcVSITECONSTR);
1229         }
1230
1231         /* ############## IF NOT VV, Calculate globals HERE  ############ */
1232         /* With Leap-Frog we can skip compute_globals at
1233          * non-communication steps, but we need to calculate
1234          * the kinetic energy one step before communication.
1235          */
1236         {
1237             // Organize to do inter-simulation signalling on steps if
1238             // and when algorithms require it.
1239             bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1240
1241             if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal)
1242             {
1243                 // Since we're already communicating at this step, we
1244                 // can propagate intra-simulation signals. Note that
1245                 // check_nstglobalcomm has the responsibility for
1246                 // choosing the value of nstglobalcomm that is one way
1247                 // bGStat becomes true, so we can't get into a
1248                 // situation where e.g. checkpointing can't be
1249                 // signalled.
1250                 bool                doIntraSimSignal = true;
1251                 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1252
1253                 compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, &vcm,
1254                                 wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
1255                                 constr, &signaller,
1256                                 lastbox,
1257                                 &totalNumberOfBondedInteractions, &bSumEkinhOld,
1258                                 (bGStat ? CGLO_GSTAT : 0)
1259                                 | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1260                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1261                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1262                                 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
1263                                 | CGLO_CONSTRAINT
1264                                 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
1265                                 );
1266                 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1267                                                 top_global, &top, state,
1268                                                 &shouldCheckNumberOfBondedInteractions);
1269             }
1270         }
1271
1272         /* #############  END CALC EKIN AND PRESSURE ################# */
1273
1274         /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1275            the virial that should probably be addressed eventually. state->veta has better properies,
1276            but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1277            generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1278
1279         if (ir->efep != efepNO && !EI_VV(ir->eI))
1280         {
1281             /* Sum up the foreign energy and dhdl terms for md and sd.
1282                Currently done every step so that dhdl is correct in the .edr */
1283             sum_dhdl(enerd, state->lambda, ir->fepvals);
1284         }
1285
1286         update_pcouple_after_coordinates(fplog, step, ir, mdatoms,
1287                                          pres, force_vir, shake_vir,
1288                                          parrinellorahmanMu,
1289                                          state, nrnb, &upd);
1290
1291         /* ################# END UPDATE STEP 2 ################# */
1292         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1293
1294         /* The coordinates (x) were unshifted in update */
1295         if (!bGStat)
1296         {
1297             /* We will not sum ekinh_old,
1298              * so signal that we still have to do it.
1299              */
1300             bSumEkinhOld = TRUE;
1301         }
1302
1303         if (bCalcEner)
1304         {
1305             /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1306
1307             /* use the directly determined last velocity, not actually the averaged half steps */
1308             if (bTrotter && ir->eI == eiVV)
1309             {
1310                 enerd->term[F_EKIN] = last_ekin;
1311             }
1312             enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1313
1314             if (integratorHasConservedEnergyQuantity(ir))
1315             {
1316                 if (EI_VV(ir->eI))
1317                 {
1318                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1319                 }
1320                 else
1321                 {
1322                     enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1323                 }
1324             }
1325             /* #########  END PREPARING EDR OUTPUT  ###########  */
1326         }
1327
1328         /* Output stuff */
1329         if (MASTER(cr))
1330         {
1331             if (fplog && do_log && bDoExpanded)
1332             {
1333                 /* only needed if doing expanded ensemble */
1334                 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : nullptr,
1335                                           state_global->dfhist, state->fep_state, ir->nstlog, step);
1336             }
1337             if (bCalcEner)
1338             {
1339                 energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep,
1340                                                  t, mdatoms->tmass, enerd, state,
1341                                                  ir->fepvals, ir->expandedvals, lastbox,
1342                                                  shake_vir, force_vir, total_vir, pres,
1343                                                  ekind, mu_tot, constr);
1344             }
1345             else
1346             {
1347                 energyOutput.recordNonEnergyStep();
1348             }
1349
1350             gmx_bool do_dr  = do_per_step(step, ir->nstdisreout);
1351             gmx_bool do_or  = do_per_step(step, ir->nstorireout);
1352
1353             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1354                                                do_log ? fplog : nullptr,
1355                                                step, t,
1356                                                eprNORMAL, fcd, groups, &(ir->opts), awh.get());
1357
1358             if (ir->bPull)
1359             {
1360                 pull_print_output(ir->pull_work, step, t);
1361             }
1362
1363             if (do_per_step(step, ir->nstlog))
1364             {
1365                 if (fflush(fplog) != 0)
1366                 {
1367                     gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1368                 }
1369             }
1370         }
1371         if (bDoExpanded)
1372         {
1373             /* Have to do this part _after_ outputting the logfile and the edr file */
1374             /* Gets written into the state at the beginning of next loop*/
1375             state->fep_state = lamnew;
1376         }
1377         /* Print the remaining wall clock time for the run */
1378         if (isMasterSimMasterRank(ms, cr) &&
1379             (do_verbose || gmx_got_usr_signal()) &&
1380             !bPMETunePrinting)
1381         {
1382             if (shellfc)
1383             {
1384                 fprintf(stderr, "\n");
1385             }
1386             print_time(stderr, walltime_accounting, step, ir, cr);
1387         }
1388
1389         /* Ion/water position swapping.
1390          * Not done in last step since trajectory writing happens before this call
1391          * in the MD loop and exchanges would be lost anyway. */
1392         bNeedRepartition = FALSE;
1393         if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
1394             do_per_step(step, ir->swap->nstswap))
1395         {
1396             bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle,
1397                                              as_rvec_array(state->x.data()),
1398                                              state->box,
1399                                              MASTER(cr) && mdrunOptions.verbose,
1400                                              bRerunMD);
1401
1402             if (bNeedRepartition && DOMAINDECOMP(cr))
1403             {
1404                 dd_collect_state(cr->dd, state, state_global);
1405             }
1406         }
1407
1408         /* Replica exchange */
1409         bExchanged = FALSE;
1410         if (bDoReplEx)
1411         {
1412             bExchanged = replica_exchange(fplog, cr, ms, repl_ex,
1413                                           state_global, enerd,
1414                                           state, step, t);
1415         }
1416
1417         if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
1418         {
1419             dd_partition_system(fplog, mdlog, step, cr, TRUE, 1,
1420                                 state_global, *top_global, ir, imdSession,
1421                                 state, &f, mdAtoms, &top, fr,
1422                                 vsite, constr,
1423                                 nrnb, wcycle, FALSE);
1424             shouldCheckNumberOfBondedInteractions = true;
1425             upd.setNumAtoms(state->natoms);
1426         }
1427
1428         bFirstStep             = FALSE;
1429         bInitStep              = FALSE;
1430         startingFromCheckpoint = false;
1431
1432         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1433         /* With all integrators, except VV, we need to retain the pressure
1434          * at the current step for coupling at the next step.
1435          */
1436         if ((state->flags & (1<<estPRES_PREV)) &&
1437             (bGStatEveryStep ||
1438              (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1439         {
1440             /* Store the pressure in t_state for pressure coupling
1441              * at the next MD step.
1442              */
1443             copy_mat(pres, state->pres_prev);
1444         }
1445
1446         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1447
1448         if ( (membed != nullptr) && (!bLastStep) )
1449         {
1450             rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1451         }
1452
1453         cycles = wallcycle_stop(wcycle, ewcSTEP);
1454         if (DOMAINDECOMP(cr) && wcycle)
1455         {
1456             dd_cycles_add(cr->dd, cycles, ddCyclStep);
1457         }
1458
1459         /* increase the MD step number */
1460         step++;
1461         step_rel++;
1462
1463         resetHandler->resetCounters(
1464                 step, step_rel, mdlog, fplog, cr, fr->nbv.get(),
1465                 nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1466
1467         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1468         imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1469
1470     }
1471     /* End of main MD loop */
1472
1473     /* Closing TNG files can include compressing data. Therefore it is good to do that
1474      * before stopping the time measurements. */
1475     mdoutf_tng_close(outf);
1476
1477     /* Stop measuring walltime */
1478     walltime_accounting_end_time(walltime_accounting);
1479
1480     if (!thisRankHasDuty(cr, DUTY_PME))
1481     {
1482         /* Tell the PME only node to finish */
1483         gmx_pme_send_finish(cr);
1484     }
1485
1486     if (MASTER(cr))
1487     {
1488         if (ir->nstcalcenergy > 0)
1489         {
1490             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE,
1491                                                fplog, step, t,
1492                                                eprAVER, fcd, groups, &(ir->opts), awh.get());
1493         }
1494     }
1495     done_mdoutf(outf);
1496
1497     if (bPMETune)
1498     {
1499         pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1500     }
1501
1502     done_shellfc(fplog, shellfc, step_rel);
1503
1504     if (useReplicaExchange && MASTER(cr))
1505     {
1506         print_replica_exchange_statistics(fplog, repl_ex);
1507     }
1508
1509     // Clean up swapcoords
1510     if (ir->eSwapCoords != eswapNO)
1511     {
1512         finish_swapcoords(ir->swap);
1513     }
1514
1515     walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1516
1517     global_stat_destroy(gstat);
1518
1519 }