added Verlet scheme and NxN non-bonded functionality
[alexxy/gromacs.git] / src / kernel / md.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  * 
4  *                This source code is part of
5  * 
6  *                 G   R   O   M   A   C   S
7  * 
8  *          GROningen MAchine for Chemical Simulations
9  * 
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  * 
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  * 
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  * 
31  * For more info, check our website at http://www.gromacs.org
32  * 
33  * And Hey:
34  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include "typedefs.h"
41 #include "smalloc.h"
42 #include "sysstuff.h"
43 #include "vec.h"
44 #include "statutil.h"
45 #include "vcm.h"
46 #include "mdebin.h"
47 #include "nrnb.h"
48 #include "calcmu.h"
49 #include "index.h"
50 #include "vsite.h"
51 #include "update.h"
52 #include "ns.h"
53 #include "trnio.h"
54 #include "xtcio.h"
55 #include "mdrun.h"
56 #include "md_support.h"
57 #include "confio.h"
58 #include "network.h"
59 #include "pull.h"
60 #include "xvgr.h"
61 #include "physics.h"
62 #include "names.h"
63 #include "xmdrun.h"
64 #include "ionize.h"
65 #include "disre.h"
66 #include "orires.h"
67 #include "pme.h"
68 #include "mdatoms.h"
69 #include "repl_ex.h"
70 #include "qmmm.h"
71 #include "mpelogging.h"
72 #include "domdec.h"
73 #include "domdec_network.h"
74 #include "partdec.h"
75 #include "topsort.h"
76 #include "coulomb.h"
77 #include "constr.h"
78 #include "shellfc.h"
79 #include "compute_io.h"
80 #include "mvdata.h"
81 #include "checkpoint.h"
82 #include "mtop_util.h"
83 #include "sighandler.h"
84 #include "txtdump.h"
85 #include "string2.h"
86 #include "pme_switch.h"
87 #include "bondf.h"
88 #include "membed.h"
89 #include "types/nlistheuristics.h"
90 #include "types/iteratedconstraints.h"
91 #include "nbnxn_cuda_data_mgmt.h"
92
93 #ifdef GMX_LIB_MPI
94 #include <mpi.h>
95 #endif
96 #ifdef GMX_THREAD_MPI
97 #include "tmpi.h"
98 #endif
99
100 #ifdef GMX_FAHCORE
101 #include "corewrap.h"
102 #endif
103
104 static void reset_all_counters(FILE *fplog,t_commrec *cr,
105                                gmx_large_int_t step,
106                                gmx_large_int_t *step_rel,t_inputrec *ir,
107                                gmx_wallcycle_t wcycle,t_nrnb *nrnb,
108                                gmx_runtime_t *runtime,
109                                nbnxn_cuda_ptr_t cu_nbv)
110 {
111     char sbuf[STEPSTRSIZE];
112
113     /* Reset all the counters related to performance over the run */
114     md_print_warn(cr,fplog,"step %s: resetting all time and cycle counters\n",
115                   gmx_step_str(step,sbuf));
116
117     if (cu_nbv)
118     {
119         nbnxn_cuda_reset_timings(cu_nbv);
120     }
121
122     wallcycle_stop(wcycle,ewcRUN);
123     wallcycle_reset_all(wcycle);
124     if (DOMAINDECOMP(cr))
125     {
126         reset_dd_statistics_counters(cr->dd);
127     }
128     init_nrnb(nrnb);
129     ir->init_step += *step_rel;
130     ir->nsteps    -= *step_rel;
131     *step_rel = 0;
132     wallcycle_start(wcycle,ewcRUN);
133     runtime_start(runtime);
134     print_date_and_time(fplog,cr->nodeid,"Restarted time",runtime);
135 }
136
137 double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
138              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
139              int nstglobalcomm,
140              gmx_vsite_t *vsite,gmx_constr_t constr,
141              int stepout,t_inputrec *ir,
142              gmx_mtop_t *top_global,
143              t_fcdata *fcd,
144              t_state *state_global,
145              t_mdatoms *mdatoms,
146              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
147              gmx_edsam_t ed,t_forcerec *fr,
148              int repl_ex_nst,int repl_ex_nex,int repl_ex_seed,gmx_membed_t membed,
149              real cpt_period,real max_hours,
150              const char *deviceOptions,
151              unsigned long Flags,
152              gmx_runtime_t *runtime)
153 {
154     gmx_mdoutf_t *outf;
155     gmx_large_int_t step,step_rel;
156     double     run_time;
157     double     t,t0,lam0[efptNR];
158     gmx_bool       bGStatEveryStep,bGStat,bCalcVir,bCalcEner;
159     gmx_bool       bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
160                bFirstStep,bStateFromCP,bStateFromTPX,bInitStep,bLastStep,
161                bBornRadii,bStartingFromCpt;
162     gmx_bool   bDoDHDL=FALSE,bDoFEP=FALSE,bDoExpanded=FALSE;
163     gmx_bool       do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
164                bForceUpdate=FALSE,bCPT;
165     int        mdof_flags;
166     gmx_bool       bMasterState;
167     int        force_flags,cglo_flags;
168     tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
169     int        i,m;
170     t_trxstatus *status;
171     rvec       mu_tot;
172     t_vcm      *vcm;
173     t_state    *bufstate=NULL;   
174     matrix     *scale_tot,pcoupl_mu,M,ebox;
175     gmx_nlheur_t nlh;
176     t_trxframe rerun_fr;
177     gmx_repl_ex_t repl_ex=NULL;
178     int        nchkpt=1;
179     gmx_localtop_t *top;        
180     t_mdebin *mdebin=NULL;
181     df_history_t df_history;
182     t_state    *state=NULL;
183     rvec       *f_global=NULL;
184     int        n_xtc=-1;
185     rvec       *x_xtc=NULL;
186     gmx_enerdata_t *enerd;
187     rvec       *f=NULL;
188     gmx_global_stat_t gstat;
189     gmx_update_t upd=NULL;
190     t_graph    *graph=NULL;
191     globsig_t   gs;
192     gmx_rng_t mcrng=NULL;
193     gmx_bool        bFFscan;
194     gmx_groups_t *groups;
195     gmx_ekindata_t *ekind, *ekind_save;
196     gmx_shellfc_t shellfc;
197     int         count,nconverged=0;
198     real        timestep=0;
199     double      tcount=0;
200     gmx_bool        bIonize=FALSE;
201     gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
202     gmx_bool        bAppend;
203     gmx_bool        bResetCountersHalfMaxH=FALSE;
204     gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
205     real        mu_aver=0,dvdl;
206     int         a0,a1,gnx=0,ii;
207     atom_id     *grpindex=NULL;
208     char        *grpname;
209     t_coupl_rec *tcr=NULL;
210     rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
211     matrix      boxcopy={{0}},lastbox;
212         tensor      tmpvir;
213         real        fom,oldfom,veta_save,pcurr,scalevir,tracevir;
214         real        vetanew = 0;
215     int         lamnew=0;
216     /* for FEP */
217     int         fep_state=0;
218     int         nstfep;
219     real        rate;
220     double      cycles;
221         real        saved_conserved_quantity = 0;
222     real        last_ekin = 0;
223         int         iter_i;
224         t_extmass   MassQ;
225     int         **trotter_seq; 
226     char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
227     int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
228     gmx_iterate_t iterate;
229     gmx_large_int_t multisim_nsteps=-1; /* number of steps to do  before first multisim 
230                                           simulation stops. If equal to zero, don't
231                                           communicate any more between multisims.*/
232     /* PME load balancing data for GPU kernels */
233     pme_switch_t    pme_switch=NULL;
234     double          cycles_pmes;
235     gmx_bool        bPMETuneTry=FALSE,bPMETuneRunning=FALSE;
236
237     if(MASTER(cr))
238     {
239         fprintf(stderr,
240                 "\n* WARNING * WARNING * WARNING * WARNING * WARNING * WARNING *\n"
241                 "We have just committed the new CPU detection code in this branch,\n"
242                 "and will commit new SSE/AVX kernels in a few days. However, this\n"
243                 "means that currently only the NxN kernels are accelerated!\n"
244                 "In the mean time, you might want to avoid production runs in 4.6.\n\n");
245     }
246
247 #ifdef GMX_FAHCORE
248     /* Temporary addition for FAHCORE checkpointing */
249     int chkpt_ret;
250 #endif
251     
252     /* Check for special mdrun options */
253     bRerunMD = (Flags & MD_RERUN);
254     bIonize  = (Flags & MD_IONIZE);
255     bFFscan  = (Flags & MD_FFSCAN);
256     bAppend  = (Flags & MD_APPENDFILES);
257     if (Flags & MD_RESETCOUNTERSHALFWAY)
258     {
259         if (ir->nsteps > 0)
260         {
261             /* Signal to reset the counters half the simulation steps. */
262             wcycle_set_reset_counters(wcycle,ir->nsteps/2);
263         }
264         /* Signal to reset the counters halfway the simulation time. */
265         bResetCountersHalfMaxH = (max_hours > 0);
266     }
267
268     /* md-vv uses averaged full step velocities for T-control 
269        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
270        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
271     bVV = EI_VV(ir->eI);
272     if (bVV) /* to store the initial velocities while computing virial */
273     {
274         snew(cbuf,top_global->natoms);
275     }
276     /* all the iteratative cases - only if there are constraints */ 
277     bIterations = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
278     bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
279     
280     if (bRerunMD)
281     {
282         /* Since we don't know if the frames read are related in any way,
283          * rebuild the neighborlist at every step.
284          */
285         ir->nstlist       = 1;
286         ir->nstcalcenergy = 1;
287         nstglobalcomm     = 1;
288     }
289
290     check_ir_old_tpx_versions(cr,fplog,ir,top_global);
291
292     nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
293     bGStatEveryStep = (nstglobalcomm == 1);
294
295     if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
296     {
297         fprintf(fplog,
298                 "To reduce the energy communication with nstlist = -1\n"
299                 "the neighbor list validity should not be checked at every step,\n"
300                 "this means that exact integration is not guaranteed.\n"
301                 "The neighbor list validity is checked after:\n"
302                 "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
303                 "In most cases this will result in exact integration.\n"
304                 "This reduces the energy communication by a factor of 2 to 3.\n"
305                 "If you want less energy communication, set nstlist > 3.\n\n");
306     }
307
308     if (bRerunMD || bFFscan)
309     {
310         ir->nstxtcout = 0;
311     }
312     groups = &top_global->groups;
313
314     /* Initial values */
315     init_md(fplog,cr,ir,oenv,&t,&t0,state_global->lambda,
316             &(state_global->fep_state),lam0,
317             nrnb,top_global,&upd,
318             nfile,fnm,&outf,&mdebin,
319             force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
320
321     clear_mat(total_vir);
322     clear_mat(pres);
323     /* Energy terms and groups */
324     snew(enerd,1);
325     init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
326                   enerd);
327     if (DOMAINDECOMP(cr))
328     {
329         f = NULL;
330     }
331     else
332     {
333         snew(f,top_global->natoms);
334     }
335
336     /* lambda Monte carlo random number generator  */
337     if (ir->bExpanded)
338     {
339         mcrng = gmx_rng_init(ir->expandedvals->lmc_seed);
340     }
341     /* copy the state into df_history */
342     copy_df_history(&df_history,&state_global->dfhist);
343
344     /* Kinetic energy data */
345     snew(ekind,1);
346     init_ekindata(fplog,top_global,&(ir->opts),ekind);
347     /* needed for iteration of constraints */
348     snew(ekind_save,1);
349     init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
350     /* Copy the cos acceleration to the groups struct */    
351     ekind->cosacc.cos_accel = ir->cos_accel;
352
353     gstat = global_stat_init(ir);
354     debug_gmx();
355
356     /* Check for polarizable models and flexible constraints */
357     shellfc = init_shell_flexcon(fplog,
358                                  top_global,n_flexible_constraints(constr),
359                                  (ir->bContinuation || 
360                                   (DOMAINDECOMP(cr) && !MASTER(cr))) ?
361                                  NULL : state_global->x);
362
363     if (DEFORM(*ir))
364     {
365 #ifdef GMX_THREAD_MPI
366         tMPI_Thread_mutex_lock(&deform_init_box_mutex);
367 #endif
368         set_deform_reference_box(upd,
369                                  deform_init_init_step_tpx,
370                                  deform_init_box_tpx);
371 #ifdef GMX_THREAD_MPI
372         tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
373 #endif
374     }
375
376     {
377         double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
378         if ((io > 2000) && MASTER(cr))
379             fprintf(stderr,
380                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
381                     io);
382     }
383
384     if (DOMAINDECOMP(cr)) {
385         top = dd_init_local_top(top_global);
386
387         snew(state,1);
388         dd_init_local_state(cr->dd,state_global,state);
389
390         if (DDMASTER(cr->dd) && ir->nstfout) {
391             snew(f_global,state_global->natoms);
392         }
393     } else {
394         if (PAR(cr)) {
395             /* Initialize the particle decomposition and split the topology */
396             top = split_system(fplog,top_global,ir,cr);
397
398             pd_cg_range(cr,&fr->cg0,&fr->hcg);
399             pd_at_range(cr,&a0,&a1);
400         } else {
401             top = gmx_mtop_generate_local_top(top_global,ir);
402
403             a0 = 0;
404             a1 = top_global->natoms;
405         }
406
407         forcerec_set_excl_load(fr,top,cr);
408
409         state = partdec_init_local_state(cr,state_global);
410         f_global = f;
411
412         atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
413
414         if (vsite) {
415             set_vsite_top(vsite,top,mdatoms,cr);
416         }
417
418         if (ir->ePBC != epbcNONE && !fr->bMolPBC) {
419             graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
420         }
421
422         if (shellfc) {
423             make_local_shells(cr,mdatoms,shellfc);
424         }
425
426         init_bonded_thread_force_reduction(fr,&top->idef);
427
428         if (ir->pull && PAR(cr)) {
429             dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
430         }
431     }
432
433     if (DOMAINDECOMP(cr))
434     {
435         /* Distribute the charge groups over the nodes from the master node */
436         dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
437                             state_global,top_global,ir,
438                             state,&f,mdatoms,top,fr,
439                             vsite,shellfc,constr,
440                             nrnb,wcycle,FALSE);
441
442     }
443
444     update_mdatoms(mdatoms,state->lambda[efptMASS]);
445
446     if (opt2bSet("-cpi",nfile,fnm))
447     {
448         bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr);
449     }
450     else
451     {
452         bStateFromCP = FALSE;
453     }
454
455     if (MASTER(cr))
456     {
457         if (bStateFromCP)
458         {
459             /* Update mdebin with energy history if appending to output files */
460             if ( Flags & MD_APPENDFILES )
461             {
462                 restore_energyhistory_from_state(mdebin,&state_global->enerhist);
463             }
464             else
465             {
466                 /* We might have read an energy history from checkpoint,
467                  * free the allocated memory and reset the counts.
468                  */
469                 done_energyhistory(&state_global->enerhist);
470                 init_energyhistory(&state_global->enerhist);
471             }
472         }
473         /* Set the initial energy history in state by updating once */
474         update_energyhistory(&state_global->enerhist,mdebin);
475     }   
476
477     if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) 
478     {
479         /* Set the random state if we read a checkpoint file */
480         set_stochd_state(upd,state);
481     }
482
483     if (state->flags & (1<<estMC_RNG))
484     {
485         set_mc_state(mcrng,state);
486     }
487
488     /* Initialize constraints */
489     if (constr) {
490         if (!DOMAINDECOMP(cr))
491             set_constraints(constr,top,ir,mdatoms,cr);
492     }
493
494     /* Check whether we have to GCT stuff */
495     bTCR = ftp2bSet(efGCT,nfile,fnm);
496     if (bTCR) {
497         if (MASTER(cr)) {
498             fprintf(stderr,"Will do General Coupling Theory!\n");
499         }
500         gnx = top_global->mols.nr;
501         snew(grpindex,gnx);
502         for(i=0; (i<gnx); i++) {
503             grpindex[i] = i;
504         }
505     }
506
507     if (repl_ex_nst > 0)
508     {
509         /* We need to be sure replica exchange can only occur
510          * when the energies are current */
511         check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
512                         "repl_ex_nst",&repl_ex_nst);
513         /* This check needs to happen before inter-simulation
514          * signals are initialized, too */
515     }
516     if (repl_ex_nst > 0 && MASTER(cr))
517     {
518         repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
519                                         repl_ex_nst,repl_ex_nex,repl_ex_seed);
520     }
521
522     /* PME tuning is only supported with GPUs or PME nodes and not with rerun */
523     if ((Flags & MD_TUNEPME) &&
524         EEL_PME(fr->eeltype) &&
525         fr->cutoff_scheme == ecutsVERLET &&
526         (fr->nbv->bUseGPU || !(cr->duty & DUTY_PME)) &&
527         !bRerunMD)
528     {
529         switch_pme_init(&pme_switch,ir,state->box,fr->ic,fr->pmedata);
530         cycles_pmes = 0;
531         if (cr->duty & DUTY_PME)
532         {
533             /* Start tuning right away, as we can't measure the load */
534             bPMETuneRunning = TRUE;
535         }
536         else
537         {
538             /* Separate PME nodes, we can measure the PP/PME load balance */
539             bPMETuneTry = TRUE;
540         }
541     }
542
543     if (!ir->bContinuation && !bRerunMD)
544     {
545         if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
546         {
547             /* Set the velocities of frozen particles to zero */
548             for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
549             {
550                 for(m=0; m<DIM; m++)
551                 {
552                     if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
553                     {
554                         state->v[i][m] = 0;
555                     }
556                 }
557             }
558         }
559
560         if (constr)
561         {
562             /* Constrain the initial coordinates and velocities */
563             do_constrain_first(fplog,constr,ir,mdatoms,state,f,
564                                graph,cr,nrnb,fr,top,shake_vir);
565         }
566         if (vsite)
567         {
568             /* Construct the virtual sites for the initial configuration */
569             construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
570                              top->idef.iparams,top->idef.il,
571                              fr->ePBC,fr->bMolPBC,graph,cr,state->box);
572         }
573     }
574
575     debug_gmx();
576   
577     /* set free energy calculation frequency as the minimum of nstdhdl, nstexpanded, and nstrepl_ex_nst*/
578     nstfep = ir->fepvals->nstdhdl;
579     if (ir->bExpanded && (nstfep > ir->expandedvals->nstexpanded))
580     {
581         nstfep = ir->expandedvals->nstexpanded;
582     }
583     if (repl_ex_nst > 0 && repl_ex_nst > nstfep)
584     {
585         nstfep = repl_ex_nst;
586     }
587
588     /* I'm assuming we need global communication the first time! MRS */
589     cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
590                   | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM:0)
591                   | (bVV ? CGLO_PRESSURE:0)
592                   | (bVV ? CGLO_CONSTRAINT:0)
593                   | (bRerunMD ? CGLO_RERUNMD:0)
594                   | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
595     
596     bSumEkinhOld = FALSE;
597     compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
598                     NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
599                     constr,NULL,FALSE,state->box,
600                     top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
601     if (ir->eI == eiVVAK) {
602         /* a second call to get the half step temperature initialized as well */ 
603         /* we do the same call as above, but turn the pressure off -- internally to 
604            compute_globals, this is recognized as a velocity verlet half-step 
605            kinetic energy calculation.  This minimized excess variables, but 
606            perhaps loses some logic?*/
607         
608         compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
609                         NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
610                         constr,NULL,FALSE,state->box,
611                         top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
612                         cglo_flags &~ (CGLO_STOPCM | CGLO_PRESSURE));
613     }
614     
615     /* Calculate the initial half step temperature, and save the ekinh_old */
616     if (!(Flags & MD_STARTFROMCPT)) 
617     {
618         for(i=0; (i<ir->opts.ngtc); i++) 
619         {
620             copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
621         } 
622     }
623     if (ir->eI != eiVV) 
624     {
625         enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
626                                      and there is no previous step */
627     }
628     
629     /* if using an iterative algorithm, we need to create a working directory for the state. */
630     if (bIterations) 
631     {
632             bufstate = init_bufstate(state);
633     }
634     if (bFFscan) 
635     {
636         snew(xcopy,state->natoms);
637         snew(vcopy,state->natoms);
638         copy_rvecn(state->x,xcopy,0,state->natoms);
639         copy_rvecn(state->v,vcopy,0,state->natoms);
640         copy_mat(state->box,boxcopy);
641     } 
642     
643     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
644        temperature control */
645     trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
646     
647     if (MASTER(cr))
648     {
649         if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
650         {
651             fprintf(fplog,
652                     "RMS relative constraint deviation after constraining: %.2e\n",
653                     constr_rmsd(constr,FALSE));
654         }
655         if (EI_STATE_VELOCITY(ir->eI))
656         {
657             fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
658         }
659         if (bRerunMD)
660         {
661             fprintf(stderr,"starting md rerun '%s', reading coordinates from"
662                     " input trajectory '%s'\n\n",
663                     *(top_global->name),opt2fn("-rerun",nfile,fnm));
664             if (bVerbose)
665             {
666                 fprintf(stderr,"Calculated time to finish depends on nsteps from "
667                         "run input file,\nwhich may not correspond to the time "
668                         "needed to process input trajectory.\n\n");
669             }
670         }
671         else
672         {
673             char tbuf[20];
674             fprintf(stderr,"starting mdrun '%s'\n",
675                     *(top_global->name));
676             if (ir->nsteps >= 0)
677             {
678                 sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
679             }
680             else
681             {
682                 sprintf(tbuf,"%s","infinite");
683             }
684             if (ir->init_step > 0)
685             {
686                 fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
687                         gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
688                         gmx_step_str(ir->init_step,sbuf2),
689                         ir->init_step*ir->delta_t);
690             }
691             else
692             {
693                 fprintf(stderr,"%s steps, %s ps.\n",
694                         gmx_step_str(ir->nsteps,sbuf),tbuf);
695             }
696         }
697         fprintf(fplog,"\n");
698     }
699
700     /* Set and write start time */
701     runtime_start(runtime);
702     print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
703     wallcycle_start(wcycle,ewcRUN);
704     if (fplog)
705     {
706         fprintf(fplog,"\n");
707     }
708
709     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
710 #ifdef GMX_FAHCORE
711     chkpt_ret=fcCheckPointParallel( cr->nodeid,
712                                     NULL,0);
713     if ( chkpt_ret == 0 ) 
714         gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
715 #endif
716
717     debug_gmx();
718     /***********************************************************
719      *
720      *             Loop over MD steps 
721      *
722      ************************************************************/
723
724     /* if rerunMD then read coordinates and velocities from input trajectory */
725     if (bRerunMD)
726     {
727         if (getenv("GMX_FORCE_UPDATE"))
728         {
729             bForceUpdate = TRUE;
730         }
731
732         rerun_fr.natoms = 0;
733         if (MASTER(cr))
734         {
735             bNotLastFrame = read_first_frame(oenv,&status,
736                                              opt2fn("-rerun",nfile,fnm),
737                                              &rerun_fr,TRX_NEED_X | TRX_READ_V);
738             if (rerun_fr.natoms != top_global->natoms)
739             {
740                 gmx_fatal(FARGS,
741                           "Number of atoms in trajectory (%d) does not match the "
742                           "run input file (%d)\n",
743                           rerun_fr.natoms,top_global->natoms);
744             }
745             if (ir->ePBC != epbcNONE)
746             {
747                 if (!rerun_fr.bBox)
748                 {
749                     gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
750                 }
751                 if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
752                 {
753                     gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
754                 }
755             }
756         }
757
758         if (PAR(cr))
759         {
760             rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
761         }
762
763         if (ir->ePBC != epbcNONE)
764         {
765             /* Set the shift vectors.
766              * Necessary here when have a static box different from the tpr box.
767              */
768             calc_shifts(rerun_fr.box,fr->shift_vec);
769         }
770     }
771
772     /* loop over MD steps or if rerunMD to end of input trajectory */
773     bFirstStep = TRUE;
774     /* Skip the first Nose-Hoover integration when we get the state from tpx */
775     bStateFromTPX = !bStateFromCP;
776     bInitStep = bFirstStep && (bStateFromTPX || bVV);
777     bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
778     bLastStep    = FALSE;
779     bSumEkinhOld = FALSE;
780     bExchanged   = FALSE;
781
782     init_global_signals(&gs,cr,ir,repl_ex_nst);
783
784     step = ir->init_step;
785     step_rel = 0;
786
787     if (ir->nstlist == -1)
788     {
789         init_nlistheuristics(&nlh,bGStatEveryStep,step);
790     }
791
792     if (MULTISIM(cr) && (repl_ex_nst <=0 ))
793     {
794         /* check how many steps are left in other sims */
795         multisim_nsteps=get_multisim_nsteps(cr, ir->nsteps);
796     }
797
798
799     /* and stop now if we should */
800     bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
801                  ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
802     while (!bLastStep || (bRerunMD && bNotLastFrame)) {
803
804         wallcycle_start(wcycle,ewcSTEP);
805
806         GMX_MPE_LOG(ev_timestep1);
807
808         if (bRerunMD) {
809             if (rerun_fr.bStep) {
810                 step = rerun_fr.step;
811                 step_rel = step - ir->init_step;
812             }
813             if (rerun_fr.bTime) {
814                 t = rerun_fr.time;
815             }
816             else
817             {
818                 t = step;
819             }
820         } 
821         else 
822         {
823             bLastStep = (step_rel == ir->nsteps);
824             t = t0 + step*ir->delta_t;
825         }
826
827         if (ir->efep != efepNO || ir->bSimTemp)
828             {
829             /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
830                requiring different logic. */
831             
832             set_current_lambdas(step,ir->fepvals,bRerunMD,&rerun_fr,state_global,state,lam0);
833             bDoDHDL = do_per_step(step,ir->fepvals->nstdhdl);
834             bDoFEP  = (do_per_step(step,nstfep) && (ir->efep != efepNO));
835             bDoExpanded  = (do_per_step(step,ir->expandedvals->nstexpanded) && (ir->bExpanded) && (step > 0));
836         }
837
838         if (bSimAnn) 
839         {
840             update_annealing_target_temp(&(ir->opts),t);
841         }
842
843         if (bRerunMD)
844         {
845             if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
846             {
847                 for(i=0; i<state_global->natoms; i++)
848                 {
849                     copy_rvec(rerun_fr.x[i],state_global->x[i]);
850                 }
851                 if (rerun_fr.bV)
852                 {
853                     for(i=0; i<state_global->natoms; i++)
854                     {
855                         copy_rvec(rerun_fr.v[i],state_global->v[i]);
856                     }
857                 }
858                 else
859                 {
860                     for(i=0; i<state_global->natoms; i++)
861                     {
862                         clear_rvec(state_global->v[i]);
863                     }
864                     if (bRerunWarnNoV)
865                     {
866                         fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
867                                 "         Ekin, temperature and pressure are incorrect,\n"
868                                 "         the virial will be incorrect when constraints are present.\n"
869                                 "\n");
870                         bRerunWarnNoV = FALSE;
871                     }
872                 }
873             }
874             copy_mat(rerun_fr.box,state_global->box);
875             copy_mat(state_global->box,state->box);
876
877             if (vsite && (Flags & MD_RERUN_VSITE))
878             {
879                 if (DOMAINDECOMP(cr))
880                 {
881                     gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
882                 }
883                 if (graph)
884                 {
885                     /* Following is necessary because the graph may get out of sync
886                      * with the coordinates if we only have every N'th coordinate set
887                      */
888                     mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
889                     shift_self(graph,state->box,state->x);
890                 }
891                 construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
892                                  top->idef.iparams,top->idef.il,
893                                  fr->ePBC,fr->bMolPBC,graph,cr,state->box);
894                 if (graph)
895                 {
896                     unshift_self(graph,state->box,state->x);
897                 }
898             }
899         }
900
901         /* Stop Center of Mass motion */
902         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
903
904         /* Copy back starting coordinates in case we're doing a forcefield scan */
905         if (bFFscan)
906         {
907             for(ii=0; (ii<state->natoms); ii++)
908             {
909                 copy_rvec(xcopy[ii],state->x[ii]);
910                 copy_rvec(vcopy[ii],state->v[ii]);
911             }
912             copy_mat(boxcopy,state->box);
913         }
914
915         if (bRerunMD)
916         {
917             /* for rerun MD always do Neighbour Searching */
918             bNS = (bFirstStep || ir->nstlist != 0);
919             bNStList = bNS;
920         }
921         else
922         {
923             /* Determine whether or not to do Neighbour Searching and LR */
924             bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
925             
926             bNS = (bFirstStep || bExchanged || bNStList || bDoFEP ||
927                    (ir->nstlist == -1 && nlh.nabnsb > 0));
928
929             if (bNS && ir->nstlist == -1)
930             {
931                 set_nlistheuristics(&nlh,bFirstStep || bExchanged || bDoFEP, step);
932             }
933         } 
934
935         /* check whether we should stop because another simulation has 
936            stopped. */
937         if (MULTISIM(cr))
938         {
939             if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&  
940                  (multisim_nsteps != ir->nsteps) )  
941             {
942                 if (bNS)
943                 {
944                     if (MASTER(cr))
945                     {
946                         fprintf(stderr, 
947                                 "Stopping simulation %d because another one has finished\n",
948                                 cr->ms->sim);
949                     }
950                     bLastStep=TRUE;
951                     gs.sig[eglsCHKPT] = 1;
952                 }
953             }
954         }
955
956         /* < 0 means stop at next step, > 0 means stop at next NS step */
957         if ( (gs.set[eglsSTOPCOND] < 0 ) ||
958              ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
959         {
960             bLastStep = TRUE;
961         }
962
963         /* Determine whether or not to update the Born radii if doing GB */
964         bBornRadii=bFirstStep;
965         if (ir->implicit_solvent && (step % ir->nstgbradii==0))
966         {
967             bBornRadii=TRUE;
968         }
969         
970         do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
971         do_verbose = bVerbose &&
972                   (step % stepout == 0 || bFirstStep || bLastStep);
973
974         if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
975         {
976             if (bRerunMD)
977             {
978                 bMasterState = TRUE;
979             }
980             else
981             {
982                 bMasterState = FALSE;
983                 /* Correct the new box if it is too skewed */
984                 if (DYNAMIC_BOX(*ir))
985                 {
986                     if (correct_box(fplog,step,state->box,graph))
987                     {
988                         bMasterState = TRUE;
989                     }
990                 }
991                 if (DOMAINDECOMP(cr) && bMasterState)
992                 {
993                     dd_collect_state(cr->dd,state,state_global);
994                 }
995             }
996
997             if (DOMAINDECOMP(cr))
998             {
999                 /* Repartition the domain decomposition */
1000                 wallcycle_start(wcycle,ewcDOMDEC);
1001                 dd_partition_system(fplog,step,cr,
1002                                     bMasterState,nstglobalcomm,
1003                                     state_global,top_global,ir,
1004                                     state,&f,mdatoms,top,fr,
1005                                     vsite,shellfc,constr,
1006                                     nrnb,wcycle,
1007                                     do_verbose && !bPMETuneRunning);
1008                 wallcycle_stop(wcycle,ewcDOMDEC);
1009                 /* If using an iterative integrator, reallocate space to match the decomposition */
1010             }
1011         }
1012
1013         if (MASTER(cr) && do_log && !bFFscan)
1014         {
1015             print_ebin_header(fplog,step,t,state->lambda[efptFEP]); /* can we improve the information printed here? */
1016         }
1017
1018         if (ir->efep != efepNO)
1019         {
1020             update_mdatoms(mdatoms,state->lambda[efptMASS]);
1021         }
1022
1023         if ((bRerunMD && rerun_fr.bV) || bExchanged)
1024         {
1025             
1026             /* We need the kinetic energy at minus the half step for determining
1027              * the full step kinetic energy and possibly for T-coupling.*/
1028             /* This may not be quite working correctly yet . . . . */
1029             compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
1030                             wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
1031                             constr,NULL,FALSE,state->box,
1032                             top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
1033                             CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
1034         }
1035         clear_mat(force_vir);
1036         
1037         /* Ionize the atoms if necessary */
1038         if (bIonize)
1039         {
1040             ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
1041                    mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
1042         }
1043         
1044         /* Update force field in ffscan program */
1045         if (bFFscan)
1046         {
1047             if (update_forcefield(fplog,
1048                                   nfile,fnm,fr,
1049                                   mdatoms->nr,state->x,state->box))
1050             {
1051                 gmx_finalize_par();
1052
1053                 exit(0);
1054             }
1055         }
1056
1057         GMX_MPE_LOG(ev_timestep2);
1058
1059         /* We write a checkpoint at this MD step when:
1060          * either at an NS step when we signalled through gs,
1061          * or at the last step (but not when we do not want confout),
1062          * but never at the first step or with rerun.
1063          */
1064         bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
1065                  (bLastStep && (Flags & MD_CONFOUT))) &&
1066                 step > ir->init_step && !bRerunMD);
1067         if (bCPT)
1068         {
1069             gs.set[eglsCHKPT] = 0;
1070         }
1071
1072         /* Determine the energy and pressure:
1073          * at nstcalcenergy steps and at energy output steps (set below).
1074          */
1075         if (EI_VV(ir->eI) && (!bInitStep))
1076         {
1077             /* for vv, the first half actually corresponds to the last step */
1078             bCalcEner = do_per_step(step-1,ir->nstcalcenergy);
1079         }
1080         else
1081         {
1082             bCalcEner = do_per_step(step,ir->nstcalcenergy);
1083         }
1084         bCalcVir = bCalcEner ||
1085             (ir->epc != epcNO && do_per_step(step,ir->nstpcouple));
1086
1087         /* Do we need global communication ? */
1088         bGStat = (bCalcVir || bCalcEner || bStopCM ||
1089                   do_per_step(step,nstglobalcomm) ||
1090                   (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
1091
1092         do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
1093
1094         if (do_ene || do_log)
1095         {
1096             bCalcVir  = TRUE;
1097             bCalcEner = TRUE;
1098             bGStat    = TRUE;
1099         }
1100         
1101         /* these CGLO_ options remain the same throughout the iteration */
1102         cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
1103                       (bGStat ? CGLO_GSTAT : 0)
1104             );
1105         
1106         force_flags = (GMX_FORCE_STATECHANGED |
1107                        ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
1108                        GMX_FORCE_ALLFORCES |
1109                        (bNStList ? GMX_FORCE_DOLR : 0) |
1110                        GMX_FORCE_SEPLRF |
1111                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
1112                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
1113                        (bDoFEP ? GMX_FORCE_DHDL : 0)
1114             );
1115         
1116         if (shellfc)
1117         {
1118             /* Now is the time to relax the shells */
1119             count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
1120                                       ir,bNS,force_flags,
1121                                       bStopCM,top,top_global,
1122                                       constr,enerd,fcd,
1123                                       state,f,force_vir,mdatoms,
1124                                       nrnb,wcycle,graph,groups,
1125                                       shellfc,fr,bBornRadii,t,mu_tot,
1126                                       state->natoms,&bConverged,vsite,
1127                                       outf->fp_field);
1128             tcount+=count;
1129
1130             if (bConverged)
1131             {
1132                 nconverged++;
1133             }
1134         }
1135         else
1136         {
1137             /* The coordinates (x) are shifted (to get whole molecules)
1138              * in do_force.
1139              * This is parallellized as well, and does communication too. 
1140              * Check comments in sim_util.c
1141              */
1142              do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
1143                      state->box,state->x,&state->hist,
1144                      f,force_vir,mdatoms,enerd,fcd,
1145                      state->lambda,graph,
1146                      fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
1147                      (bNS ? GMX_FORCE_NS : 0) | force_flags);
1148         }
1149     
1150         GMX_BARRIER(cr->mpi_comm_mygroup);
1151         
1152         if (bTCR)
1153         {
1154             mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
1155                                    mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
1156         }
1157         
1158         if (bTCR && bFirstStep)
1159         {
1160             tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
1161             fprintf(fplog,"Done init_coupling\n"); 
1162             fflush(fplog);
1163         }
1164         
1165         if (bVV && !bStartingFromCpt && !bRerunMD)
1166         /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
1167         {
1168             if (ir->eI==eiVV && bInitStep) 
1169             {
1170                 /* if using velocity verlet with full time step Ekin,
1171                  * take the first half step only to compute the 
1172                  * virial for the first step. From there,
1173                  * revert back to the initial coordinates
1174                  * so that the input is actually the initial step.
1175                  */
1176                 copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
1177             } else {
1178                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
1179                 trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
1180             }
1181
1182             update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,
1183                           f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
1184                           ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
1185                           cr,nrnb,constr,&top->idef);
1186             
1187             if (bIterations)
1188             {
1189                 gmx_iterate_init(&iterate,bIterations && !bInitStep);
1190             }
1191             /* for iterations, we save these vectors, as we will be self-consistently iterating
1192                the calculations */
1193
1194             /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
1195             
1196             /* save the state */
1197             if (bIterations && iterate.bIterate) { 
1198                 copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
1199             }
1200             
1201             bFirstIterate = TRUE;
1202             while (bFirstIterate || (bIterations && iterate.bIterate))
1203             {
1204                 if (bIterations && iterate.bIterate) 
1205                 {
1206                     copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
1207                     if (bFirstIterate && bTrotter) 
1208                     {
1209                         /* The first time through, we need a decent first estimate
1210                            of veta(t+dt) to compute the constraints.  Do
1211                            this by computing the box volume part of the
1212                            trotter integration at this time. Nothing else
1213                            should be changed by this routine here.  If
1214                            !(first time), we start with the previous value
1215                            of veta.  */
1216                         
1217                         veta_save = state->veta;
1218                         trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
1219                         vetanew = state->veta;
1220                         state->veta = veta_save;
1221                     } 
1222                 } 
1223                 
1224                 bOK = TRUE;
1225                 if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
1226                     dvdl = 0;
1227                     
1228                     update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
1229                                        state,fr->bMolPBC,graph,f,
1230                                        &top->idef,shake_vir,NULL,
1231                                        cr,nrnb,wcycle,upd,constr,
1232                                        bInitStep,TRUE,bCalcVir,vetanew);
1233                     
1234                     if (!bOK && !bFFscan)
1235                     {
1236                         gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
1237                     }
1238                     
1239                 } 
1240                 else if (graph)
1241                 { /* Need to unshift here if a do_force has been
1242                      called in the previous step */
1243                     unshift_self(graph,state->box,state->x);
1244                 }
1245                 
1246                 
1247                 /* if VV, compute the pressure and constraints */
1248                 /* For VV2, we strictly only need this if using pressure
1249                  * control, but we really would like to have accurate pressures
1250                  * printed out.
1251                  * Think about ways around this in the future?
1252                  * For now, keep this choice in comments.
1253                  */
1254                 /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
1255                     /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
1256                 bPres = TRUE;
1257                 bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK));
1258                 if (bCalcEner && ir->eI==eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
1259                 {
1260                     bSumEkinhOld = TRUE;
1261                 }
1262                 compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
1263                                 wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
1264                                 constr,NULL,FALSE,state->box,
1265                                 top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
1266                                 cglo_flags 
1267                                 | CGLO_ENERGY 
1268                                 | (bStopCM ? CGLO_STOPCM : 0)
1269                                 | (bTemp ? CGLO_TEMPERATURE:0) 
1270                                 | (bPres ? CGLO_PRESSURE : 0) 
1271                                 | (bPres ? CGLO_CONSTRAINT : 0)
1272                                 | ((bIterations && iterate.bIterate) ? CGLO_ITERATE : 0)  
1273                                 | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
1274                                 | CGLO_SCALEEKIN 
1275                     );
1276                 /* explanation of above: 
1277                    a) We compute Ekin at the full time step
1278                    if 1) we are using the AveVel Ekin, and it's not the
1279                    initial step, or 2) if we are using AveEkin, but need the full
1280                    time step kinetic energy for the pressure (always true now, since we want accurate statistics).
1281                    b) If we are using EkinAveEkin for the kinetic energy for the temperture control, we still feed in 
1282                    EkinAveVel because it's needed for the pressure */
1283                 
1284                 /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1285                 if (!bInitStep) 
1286                 {
1287                     if (bTrotter)
1288                     {
1289                         trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
1290                     } 
1291                     else 
1292                     {
1293                         if (bExchanged)
1294                         {
1295             
1296                             /* We need the kinetic energy at minus the half step for determining
1297                              * the full step kinetic energy and possibly for T-coupling.*/
1298                             /* This may not be quite working correctly yet . . . . */
1299                             compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
1300                                             wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
1301                                             constr,NULL,FALSE,state->box,
1302                                             top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
1303                                             CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
1304                         }
1305
1306
1307                         update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
1308                     }
1309                 }
1310                 
1311                 if (bIterations &&
1312                     done_iterating(cr,fplog,step,&iterate,bFirstIterate,
1313                                    state->veta,&vetanew)) 
1314                 {
1315                     break;
1316                 }
1317                 bFirstIterate = FALSE;
1318             }
1319
1320             if (bTrotter && !bInitStep) {
1321                 enerd->term[F_DVDL_BONDED] += dvdl;        /* only add after iterations */
1322                 copy_mat(shake_vir,state->svir_prev);
1323                 copy_mat(force_vir,state->fvir_prev);
1324                 if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
1325                     /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1326                     enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
1327                     enerd->term[F_EKIN] = trace(ekind->ekin);
1328                 }
1329             }
1330             /* if it's the initial step, we performed this first step just to get the constraint virial */
1331             if (bInitStep && ir->eI==eiVV) {
1332                 copy_rvecn(cbuf,state->v,0,state->natoms);
1333             }
1334             
1335             if (fr->bSepDVDL && fplog && do_log) 
1336             {
1337                 fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
1338             }
1339             enerd->term[F_DVDL_BONDED] += dvdl;
1340             
1341             GMX_MPE_LOG(ev_timestep1);
1342         }
1343     
1344         /* MRS -- now done iterating -- compute the conserved quantity */
1345         if (bVV) {
1346             saved_conserved_quantity = compute_conserved_from_auxiliary(ir,state,&MassQ);
1347             if (ir->eI==eiVV) 
1348             {
1349                 last_ekin = enerd->term[F_EKIN];
1350             }
1351             if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) 
1352             {
1353                 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1354             }
1355             /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
1356             sum_dhdl(enerd,state->lambda,ir->fepvals);
1357         }
1358         
1359         /* ########  END FIRST UPDATE STEP  ############## */
1360         /* ########  If doing VV, we now have v(dt) ###### */
1361         if (bDoExpanded) {
1362             /* perform extended ensemble sampling in lambda - we don't
1363                actually move to the new state before outputting
1364                statistics, but if performing simulated tempering, we
1365                do update the velocities and the tau_t. */
1366         
1367             lamnew = ExpandedEnsembleDynamics(fplog,ir,enerd,state,&MassQ,&df_history,step,mcrng,state->v,mdatoms);
1368         }
1369         /* ################## START TRAJECTORY OUTPUT ################# */
1370         
1371         /* Now we have the energies and forces corresponding to the 
1372          * coordinates at time t. We must output all of this before
1373          * the update.
1374          * for RerunMD t is read from input trajectory
1375          */
1376         GMX_MPE_LOG(ev_output_start);
1377
1378         mdof_flags = 0;
1379         if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
1380         if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
1381         if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
1382         if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
1383         if (bCPT) { mdof_flags |= MDOF_CPT; };
1384
1385 #if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
1386         if (bLastStep)
1387         {
1388             /* Enforce writing positions and velocities at end of run */
1389             mdof_flags |= (MDOF_X | MDOF_V);
1390         }
1391 #endif
1392 #ifdef GMX_FAHCORE
1393         if (MASTER(cr))
1394             fcReportProgress( ir->nsteps, step );
1395
1396         /* sync bCPT and fc record-keeping */
1397         if (bCPT && MASTER(cr))
1398             fcRequestCheckPoint();
1399 #endif
1400         
1401         if (mdof_flags != 0)
1402         {
1403             wallcycle_start(wcycle,ewcTRAJ);
1404             if (bCPT)
1405             {
1406                 if (state->flags & (1<<estLD_RNG))
1407                 {
1408                     get_stochd_state(upd,state);
1409                 }
1410                 if (state->flags  & (1<<estMC_RNG))
1411                 {
1412                     get_mc_state(mcrng,state);
1413                 }
1414                 if (MASTER(cr))
1415                 {
1416                     if (bSumEkinhOld)
1417                     {
1418                         state_global->ekinstate.bUpToDate = FALSE;
1419                     }
1420                     else
1421                     {
1422                         update_ekinstate(&state_global->ekinstate,ekind);
1423                         state_global->ekinstate.bUpToDate = TRUE;
1424                     }
1425                     update_energyhistory(&state_global->enerhist,mdebin);
1426                     if (ir->efep!=efepNO || ir->bSimTemp) 
1427                     {
1428                         state_global->fep_state = state->fep_state; /* MRS: seems kludgy. The code should be
1429                                                                        structured so this isn't necessary.
1430                                                                        Note this reassignment is only necessary
1431                                                                        for single threads.*/
1432                         copy_df_history(&state_global->dfhist,&df_history);
1433                     }
1434                 }
1435             }
1436             write_traj(fplog,cr,outf,mdof_flags,top_global,
1437                        step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
1438             if (bCPT)
1439             {
1440                 nchkpt++;
1441                 bCPT = FALSE;
1442             }
1443             debug_gmx();
1444             if (bLastStep && step_rel == ir->nsteps &&
1445                 (Flags & MD_CONFOUT) && MASTER(cr) &&
1446                 !bRerunMD && !bFFscan)
1447             {
1448                 /* x and v have been collected in write_traj,
1449                  * because a checkpoint file will always be written
1450                  * at the last step.
1451                  */
1452                 fprintf(stderr,"\nWriting final coordinates.\n");
1453                 if (fr->bMolPBC)
1454                 {
1455                     /* Make molecules whole only for confout writing */
1456                     do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
1457                 }
1458                 write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
1459                                     *top_global->name,top_global,
1460                                     state_global->x,state_global->v,
1461                                     ir->ePBC,state->box);
1462                 debug_gmx();
1463             }
1464             wallcycle_stop(wcycle,ewcTRAJ);
1465         }
1466         GMX_MPE_LOG(ev_output_finish);
1467         
1468         /* kludge -- virial is lost with restart for NPT control. Must restart */
1469         if (bStartingFromCpt && bVV) 
1470         {
1471             copy_mat(state->svir_prev,shake_vir);
1472             copy_mat(state->fvir_prev,force_vir);
1473         }
1474         /*  ################## END TRAJECTORY OUTPUT ################ */
1475         
1476         /* Determine the wallclock run time up till now */
1477         run_time = gmx_gettime() - (double)runtime->real;
1478
1479         /* Check whether everything is still allright */    
1480         if (((int)gmx_get_stop_condition() > handled_stop_condition)
1481 #ifdef GMX_THREAD_MPI
1482             && MASTER(cr)
1483 #endif
1484             )
1485         {
1486             /* this is just make gs.sig compatible with the hack 
1487                of sending signals around by MPI_Reduce with together with
1488                other floats */
1489             if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
1490                 gs.sig[eglsSTOPCOND]=1;
1491             if ( gmx_get_stop_condition() == gmx_stop_cond_next )
1492                 gs.sig[eglsSTOPCOND]=-1;
1493             /* < 0 means stop at next step, > 0 means stop at next NS step */
1494             if (fplog)
1495             {
1496                 fprintf(fplog,
1497                         "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
1498                         gmx_get_signal_name(),
1499                         gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
1500                 fflush(fplog);
1501             }
1502             fprintf(stderr,
1503                     "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
1504                     gmx_get_signal_name(),
1505                     gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
1506             fflush(stderr);
1507             handled_stop_condition=(int)gmx_get_stop_condition();
1508         }
1509         else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
1510                  (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
1511                  gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
1512         {
1513             /* Signal to terminate the run */
1514             gs.sig[eglsSTOPCOND] = 1;
1515             if (fplog)
1516             {
1517                 fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
1518             }
1519             fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
1520         }
1521
1522         if (bResetCountersHalfMaxH && MASTER(cr) &&
1523             run_time > max_hours*60.0*60.0*0.495)
1524         {
1525             gs.sig[eglsRESETCOUNTERS] = 1;
1526         }
1527
1528         if (ir->nstlist == -1 && !bRerunMD)
1529         {
1530             /* When bGStatEveryStep=FALSE, global_stat is only called
1531              * when we check the atom displacements, not at NS steps.
1532              * This means that also the bonded interaction count check is not
1533              * performed immediately after NS. Therefore a few MD steps could
1534              * be performed with missing interactions.
1535              * But wrong energies are never written to file,
1536              * since energies are only written after global_stat
1537              * has been called.
1538              */
1539             if (step >= nlh.step_nscheck)
1540             {
1541                 nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
1542                                                      nlh.scale_tot,state->x);
1543             }
1544             else
1545             {
1546                 /* This is not necessarily true,
1547                  * but step_nscheck is determined quite conservatively.
1548                  */
1549                 nlh.nabnsb = 0;
1550             }
1551         }
1552
1553         /* In parallel we only have to check for checkpointing in steps
1554          * where we do global communication,
1555          *  otherwise the other nodes don't know.
1556          */
1557         if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
1558                            cpt_period >= 0 &&
1559                            (cpt_period == 0 || 
1560                             run_time >= nchkpt*cpt_period*60.0)) &&
1561             gs.set[eglsCHKPT] == 0)
1562         {
1563             gs.sig[eglsCHKPT] = 1;
1564         }
1565   
1566
1567         /* at the start of step, randomize the velocities */
1568         if (ETC_ANDERSEN(ir->etc) && EI_VV(ir->eI))
1569         {
1570             gmx_bool bDoAndersenConstr;
1571             bDoAndersenConstr = (constr && update_randomize_velocities(ir,step,mdatoms,state,upd,&top->idef,constr));
1572             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1573             if (bDoAndersenConstr)
1574             {
1575                 update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
1576                                    state,fr->bMolPBC,graph,f,
1577                                    &top->idef,tmp_vir,NULL,
1578                                    cr,nrnb,wcycle,upd,constr,
1579                                    bInitStep,TRUE,bCalcVir,vetanew);
1580             }
1581         }
1582
1583         if (bIterations)
1584         {
1585             gmx_iterate_init(&iterate,bIterations);
1586         }
1587     
1588         /* for iterations, we save these vectors, as we will be redoing the calculations */
1589         if (bIterations && iterate.bIterate) 
1590         {
1591             copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
1592         }
1593         bFirstIterate = TRUE;
1594         while (bFirstIterate || (bIterations && iterate.bIterate))
1595         {
1596             /* We now restore these vectors to redo the calculation with improved extended variables */    
1597             if (bIterations) 
1598             { 
1599                 copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
1600             }
1601
1602             /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
1603                so scroll down for that logic */
1604             
1605             /* #########   START SECOND UPDATE STEP ################# */
1606             GMX_MPE_LOG(ev_update_start);
1607             /* Box is changed in update() when we do pressure coupling,
1608              * but we should still use the old box for energy corrections and when
1609              * writing it to the energy file, so it matches the trajectory files for
1610              * the same timestep above. Make a copy in a separate array.
1611              */
1612             copy_mat(state->box,lastbox);
1613
1614             bOK = TRUE;
1615             if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
1616             {
1617                 wallcycle_start(wcycle,ewcUPDATE);
1618                 dvdl = 0;
1619                 /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1620                 if (bTrotter) 
1621                 {
1622                     if (bIterations && iterate.bIterate) 
1623                     {
1624                         if (bFirstIterate) 
1625                         {
1626                             scalevir = 1;
1627                         }
1628                         else 
1629                         {
1630                             /* we use a new value of scalevir to converge the iterations faster */
1631                             scalevir = tracevir/trace(shake_vir);
1632                         }
1633                         msmul(shake_vir,scalevir,shake_vir); 
1634                         m_add(force_vir,shake_vir,total_vir);
1635                         clear_mat(shake_vir);
1636                     }
1637                     trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ3);
1638                 /* We can only do Berendsen coupling after we have summed
1639                  * the kinetic energy or virial. Since the happens
1640                  * in global_state after update, we should only do it at
1641                  * step % nstlist = 1 with bGStatEveryStep=FALSE.
1642                  */
1643                 }
1644                 else 
1645                 {
1646                     update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
1647                     update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
1648                                    upd,bInitStep);
1649                 }
1650
1651                 if (bVV)
1652                 {
1653                     /* velocity half-step update */
1654                     update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
1655                                   fr->bTwinRange && bNStList,fr->f_twin,fcd,
1656                                   ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
1657                                   cr,nrnb,constr,&top->idef);
1658                 }
1659
1660                 /* Above, initialize just copies ekinh into ekin,
1661                  * it doesn't copy position (for VV),
1662                  * and entire integrator for MD.
1663                  */
1664                 
1665                 if (ir->eI==eiVVAK) 
1666                 {
1667                     copy_rvecn(state->x,cbuf,0,state->natoms);
1668                 }
1669                 
1670                 update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
1671                               fr->bTwinRange && bNStList,fr->f_twin,fcd,
1672                               ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
1673                 wallcycle_stop(wcycle,ewcUPDATE);
1674
1675                 update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,
1676                                    fr->bMolPBC,graph,f,
1677                                    &top->idef,shake_vir,force_vir,
1678                                    cr,nrnb,wcycle,upd,constr,
1679                                    bInitStep,FALSE,bCalcVir,state->veta);  
1680                 
1681                 if (ir->eI==eiVVAK) 
1682                 {
1683                     /* erase F_EKIN and F_TEMP here? */
1684                     /* just compute the kinetic energy at the half step to perform a trotter step */
1685                     compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
1686                                     wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
1687                                     constr,NULL,FALSE,lastbox,
1688                                     top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
1689                                     cglo_flags | CGLO_TEMPERATURE    
1690                         );
1691                     wallcycle_start(wcycle,ewcUPDATE);
1692                     trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ4);            
1693                     /* now we know the scaling, we can compute the positions again again */
1694                     copy_rvecn(cbuf,state->x,0,state->natoms);
1695
1696                     update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
1697                                   fr->bTwinRange && bNStList,fr->f_twin,fcd,
1698                                   ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
1699                     wallcycle_stop(wcycle,ewcUPDATE);
1700
1701                     /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
1702                     /* are the small terms in the shake_vir here due
1703                      * to numerical errors, or are they important
1704                      * physically? I'm thinking they are just errors, but not completely sure. 
1705                      * For now, will call without actually constraining, constr=NULL*/
1706                     update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
1707                                        state,fr->bMolPBC,graph,f,
1708                                        &top->idef,tmp_vir,force_vir,
1709                                        cr,nrnb,wcycle,upd,NULL,
1710                                        bInitStep,FALSE,bCalcVir,
1711                                        state->veta);  
1712                 }
1713                 if (!bOK && !bFFscan) 
1714                 {
1715                     gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
1716                 }
1717                 
1718                 if (fr->bSepDVDL && fplog && do_log) 
1719                 {
1720                     fprintf(fplog,sepdvdlformat,"Constraint dV/dl",0.0,dvdl);
1721                 }
1722                 enerd->term[F_DVDL_BONDED] += dvdl;
1723             } 
1724             else if (graph) 
1725             {
1726                 /* Need to unshift here */
1727                 unshift_self(graph,state->box,state->x);
1728             }
1729             
1730             GMX_BARRIER(cr->mpi_comm_mygroup);
1731             GMX_MPE_LOG(ev_update_finish);
1732
1733             if (vsite != NULL) 
1734             {
1735                 wallcycle_start(wcycle,ewcVSITECONSTR);
1736                 if (graph != NULL) 
1737                 {
1738                     shift_self(graph,state->box,state->x);
1739                 }
1740                 construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
1741                                  top->idef.iparams,top->idef.il,
1742                                  fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1743                 
1744                 if (graph != NULL) 
1745                 {
1746                     unshift_self(graph,state->box,state->x);
1747                 }
1748                 wallcycle_stop(wcycle,ewcVSITECONSTR);
1749             }
1750             
1751             /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */
1752             /* With Leap-Frog we can skip compute_globals at
1753              * non-communication steps, but we need to calculate
1754              * the kinetic energy one step before communication.
1755              */
1756             if (bGStat || do_per_step(step+1,nstglobalcomm) ||
1757                 EI_VV(ir->eI))
1758             {
1759                 if (ir->nstlist == -1 && bFirstIterate)
1760                 {
1761                     gs.sig[eglsNABNSB] = nlh.nabnsb;
1762                 }
1763                 compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
1764                                 wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
1765                                 constr,
1766                                 bFirstIterate ? &gs : NULL, 
1767                                 (step_rel % gs.nstms == 0) && 
1768                                 (multisim_nsteps<0 || (step_rel<multisim_nsteps)),
1769                                 lastbox,
1770                                 top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
1771                                 cglo_flags 
1772                                 | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0)
1773                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1774                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
1775                                 | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) 
1776                                 | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0) 
1777                                 | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
1778                                 | CGLO_CONSTRAINT 
1779                     );
1780                 if (ir->nstlist == -1 && bFirstIterate)
1781                 {
1782                     nlh.nabnsb = gs.set[eglsNABNSB];
1783                     gs.set[eglsNABNSB] = 0;
1784                 }
1785             }
1786             /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
1787             /* #############  END CALC EKIN AND PRESSURE ################# */
1788         
1789             /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1790                the virial that should probably be addressed eventually. state->veta has better properies,
1791                but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1792                generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1793
1794             if (bIterations && 
1795                 done_iterating(cr,fplog,step,&iterate,bFirstIterate,
1796                                trace(shake_vir),&tracevir)) 
1797             {
1798                 break;
1799             }
1800             bFirstIterate = FALSE;
1801         }
1802
1803         /* only add constraint dvdl after constraints */
1804         enerd->term[F_DVDL_BONDED] += dvdl;
1805         if (!bVV)
1806         {
1807             /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */
1808             sum_dhdl(enerd,state->lambda,ir->fepvals);
1809         }
1810         update_box(fplog,step,ir,mdatoms,state,graph,f,
1811                    ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
1812         
1813         /* ################# END UPDATE STEP 2 ################# */
1814         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1815     
1816         /* The coordinates (x) were unshifted in update */
1817         if (bFFscan && (shellfc==NULL || bConverged))
1818         {
1819             if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
1820                                  f,NULL,xcopy,
1821                                  &(top_global->mols),mdatoms->massT,pres))
1822             {
1823                 gmx_finalize_par();
1824
1825                 fprintf(stderr,"\n");
1826                 exit(0);
1827             }
1828         }
1829         if (!bGStat)
1830         {
1831             /* We will not sum ekinh_old,                                                            
1832              * so signal that we still have to do it.                                                
1833              */
1834             bSumEkinhOld = TRUE;
1835         }
1836         
1837         if (bTCR)
1838         {
1839             /* Only do GCT when the relaxation of shells (minimization) has converged,
1840              * otherwise we might be coupling to bogus energies. 
1841              * In parallel we must always do this, because the other sims might
1842              * update the FF.
1843              */
1844
1845             /* Since this is called with the new coordinates state->x, I assume
1846              * we want the new box state->box too. / EL 20040121
1847              */
1848             do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
1849                         ir,MASTER(cr),
1850                         mdatoms,&(top->idef),mu_aver,
1851                         top_global->mols.nr,cr,
1852                         state->box,total_vir,pres,
1853                         mu_tot,state->x,f,bConverged);
1854             debug_gmx();
1855         }
1856
1857         /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1858         
1859         /* use the directly determined last velocity, not actually the averaged half steps */
1860         if (bTrotter && ir->eI==eiVV) 
1861         {
1862             enerd->term[F_EKIN] = last_ekin;
1863         }
1864         enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1865         
1866         if (bVV)
1867         {
1868             enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1869         }
1870         else 
1871         {
1872             enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir,state,&MassQ);
1873         }
1874         /* Check for excessively large energies */
1875         if (bIonize) 
1876         {
1877 #ifdef GMX_DOUBLE
1878             real etot_max = 1e200;
1879 #else
1880             real etot_max = 1e30;
1881 #endif
1882             if (fabs(enerd->term[F_ETOT]) > etot_max) 
1883             {
1884                 fprintf(stderr,"Energy too large (%g), giving up\n",
1885                         enerd->term[F_ETOT]);
1886             }
1887         }
1888         /* #########  END PREPARING EDR OUTPUT  ###########  */
1889         
1890         /* Time for performance */
1891         if (((step % stepout) == 0) || bLastStep) 
1892         {
1893             runtime_upd_proc(runtime);
1894         }
1895         
1896         /* Output stuff */
1897         if (MASTER(cr))
1898         {
1899             gmx_bool do_dr,do_or;
1900             
1901             if (fplog && do_log && bDoExpanded)
1902             {
1903                 /* only needed if doing expanded ensemble */
1904                 PrintFreeEnergyInfoToFile(fplog,ir->fepvals,ir->expandedvals,ir->bSimTemp?ir->simtempvals:NULL,
1905                                           &df_history,state->fep_state,ir->nstlog,step);
1906             }
1907             if (!(bStartingFromCpt && (EI_VV(ir->eI)))) 
1908             {
1909                 if (bCalcEner)
1910                 {
1911                     upd_mdebin(mdebin,bDoDHDL, TRUE,
1912                                t,mdatoms->tmass,enerd,state,
1913                                ir->fepvals,ir->expandedvals,lastbox,
1914                                shake_vir,force_vir,total_vir,pres,
1915                                ekind,mu_tot,constr);
1916                 }
1917                 else
1918                 {
1919                     upd_mdebin_step(mdebin);
1920                 }
1921                 
1922                 do_dr  = do_per_step(step,ir->nstdisreout);
1923                 do_or  = do_per_step(step,ir->nstorireout);
1924                 
1925                 print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
1926                            step,t,
1927                            eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
1928             }
1929             if (ir->ePull != epullNO)
1930             {
1931                 pull_print_output(ir->pull,step,t);
1932             }
1933             
1934             if (do_per_step(step,ir->nstlog))
1935             {
1936                 if(fflush(fplog) != 0)
1937                 {
1938                     gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
1939                 }
1940             }
1941         }
1942         if (bDoExpanded)
1943         {
1944             /* Have to do this part after outputting the logfile and the edr file */
1945             state->fep_state = lamnew;
1946             for (i=0;i<efptNR;i++)
1947             {
1948                 state->lambda[i] = ir->fepvals->all_lambda[i][lamnew];
1949             }
1950         }
1951         /* Remaining runtime */
1952         if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning)
1953         {
1954             if (shellfc) 
1955             {
1956                 fprintf(stderr,"\n");
1957             }
1958             print_time(stderr,runtime,step,ir,cr);
1959         }
1960
1961         /* Replica exchange */
1962         bExchanged = FALSE;
1963         if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
1964             do_per_step(step,repl_ex_nst)) 
1965         {
1966             bExchanged = replica_exchange(fplog,cr,repl_ex,
1967                                           state_global,enerd,
1968                                           state,step,t);
1969
1970             if (bExchanged && DOMAINDECOMP(cr)) 
1971             {
1972                 dd_partition_system(fplog,step,cr,TRUE,1,
1973                                     state_global,top_global,ir,
1974                                     state,&f,mdatoms,top,fr,
1975                                     vsite,shellfc,constr,
1976                                     nrnb,wcycle,FALSE);
1977             }
1978         }
1979         
1980         bFirstStep = FALSE;
1981         bInitStep = FALSE;
1982         bStartingFromCpt = FALSE;
1983
1984         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1985         /* With all integrators, except VV, we need to retain the pressure
1986          * at the current step for coupling at the next step.
1987          */
1988         if ((state->flags & (1<<estPRES_PREV)) &&
1989             (bGStatEveryStep ||
1990              (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1991         {
1992             /* Store the pressure in t_state for pressure coupling
1993              * at the next MD step.
1994              */
1995             copy_mat(pres,state->pres_prev);
1996         }
1997         
1998         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1999
2000         if ( (membed!=NULL) && (!bLastStep) )
2001         {
2002             rescale_membed(step_rel,membed,state_global->x);
2003         }
2004
2005         if (bRerunMD) 
2006         {
2007             if (MASTER(cr))
2008             {
2009                 /* read next frame from input trajectory */
2010                 bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
2011             }
2012
2013             if (PAR(cr))
2014             {
2015                 rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
2016             }
2017         }
2018         
2019         if (!bRerunMD || !rerun_fr.bStep)
2020         {
2021             /* increase the MD step number */
2022             step++;
2023             step_rel++;
2024         }
2025         
2026         cycles = wallcycle_stop(wcycle,ewcSTEP);
2027         if (DOMAINDECOMP(cr) && wcycle)
2028         {
2029             dd_cycles_add(cr->dd,cycles,ddCyclStep);
2030         }
2031
2032         if (bPMETuneRunning || bPMETuneTry)
2033         {
2034             /* PME grid + cut-off optimization with GPUs or PME nodes */
2035
2036             /* Count the total cycles over the last steps */
2037             cycles_pmes += cycles;
2038
2039             /* We can only switch cut-off at NS steps */
2040             if (step % ir->nstlist == 0)
2041             {
2042                 /* PME grid + cut-off optimization with GPUs or PME nodes */
2043                 if (bPMETuneTry)
2044                 {
2045                     if (DDMASTER(cr->dd))
2046                     {
2047                         /* PME node load is too high, start tuning */
2048                         bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05);
2049                     }
2050                     dd_bcast(cr->dd,sizeof(gmx_bool),&bPMETuneRunning);
2051
2052                     if (bPMETuneRunning || step_rel > ir->nstlist*50)
2053                     {
2054                         bPMETuneTry     = FALSE;
2055                     }
2056                 }
2057                 if (bPMETuneRunning)
2058                 {
2059                     /* init_step might not be a multiple of nstlist,
2060                      * but the first cycle is always skipped anyhow.
2061                      */
2062                     bPMETuneRunning =
2063                         switch_pme(pme_switch,cr,
2064                                    (bVerbose && MASTER(cr)) ? stderr : NULL,
2065                                    fplog,
2066                                    ir,state,cycles_pmes,
2067                                    fr->ic,fr->nbv,&fr->pmedata,
2068                                    step);
2069
2070                     fr->ewaldcoeff = fr->ic->ewaldcoeff;
2071                 }
2072
2073                 cycles_pmes = 0;
2074             }
2075         }
2076         
2077         if (step_rel == wcycle_get_reset_counters(wcycle) ||
2078             gs.set[eglsRESETCOUNTERS] != 0)
2079         {
2080             /* Reset all the counters related to performance over the run */
2081             reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime,
2082                                fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
2083             wcycle_set_reset_counters(wcycle,-1);
2084             /* Correct max_hours for the elapsed time */
2085             max_hours -= run_time/(60.0*60.0);
2086             bResetCountersHalfMaxH = FALSE;
2087             gs.set[eglsRESETCOUNTERS] = 0;
2088         }
2089
2090     }
2091     /* End of main MD loop */
2092     debug_gmx();
2093     
2094     /* Stop the time */
2095     runtime_end(runtime);
2096     
2097     if (bRerunMD && MASTER(cr))
2098     {
2099         close_trj(status);
2100     }
2101     
2102     if (!(cr->duty & DUTY_PME))
2103     {
2104         /* Tell the PME only node to finish */
2105         gmx_pme_send_finish(cr);
2106     }
2107     
2108     if (MASTER(cr))
2109     {
2110         if (ir->nstcalcenergy > 0 && !bRerunMD) 
2111         {
2112             print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
2113                        eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
2114         }
2115     }
2116
2117     done_mdoutf(outf);
2118
2119     debug_gmx();
2120
2121     if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
2122     {
2123         fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
2124         fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
2125     }
2126     
2127     if (shellfc && fplog)
2128     {
2129         fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
2130                 (nconverged*100.0)/step_rel);
2131         fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
2132                 tcount/step_rel);
2133     }
2134     
2135     if (repl_ex_nst > 0 && MASTER(cr))
2136     {
2137         print_replica_exchange_statistics(fplog,repl_ex);
2138     }
2139     
2140     runtime->nsteps_done = step_rel;
2141
2142    return 0;
2143 }