Merge release-4-6 into master
[alexxy/gromacs.git] / src / programs / mdrun / md.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  * 
4  *                This source code is part of
5  * 
6  *                 G   R   O   M   A   C   S
7  * 
8  *          GROningen MAchine for Chemical Simulations
9  * 
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  * 
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  * 
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  * 
31  * For more info, check our website at http://www.gromacs.org
32  * 
33  * And Hey:
34  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include "typedefs.h"
41 #include "smalloc.h"
42 #include "sysstuff.h"
43 #include "vec.h"
44 #include "statutil.h"
45 #include "vcm.h"
46 #include "mdebin.h"
47 #include "nrnb.h"
48 #include "calcmu.h"
49 #include "index.h"
50 #include "vsite.h"
51 #include "update.h"
52 #include "ns.h"
53 #include "trnio.h"
54 #include "xtcio.h"
55 #include "mdrun.h"
56 #include "md_support.h"
57 #include "confio.h"
58 #include "network.h"
59 #include "pull.h"
60 #include "xvgr.h"
61 #include "physics.h"
62 #include "names.h"
63 #include "xmdrun.h"
64 #include "ionize.h"
65 #include "disre.h"
66 #include "orires.h"
67 #include "pme.h"
68 #include "mdatoms.h"
69 #include "repl_ex.h"
70 #include "qmmm.h"
71 #include "domdec.h"
72 #include "domdec_network.h"
73 #include "partdec.h"
74 #include "topsort.h"
75 #include "coulomb.h"
76 #include "constr.h"
77 #include "shellfc.h"
78 #include "compute_io.h"
79 #include "mvdata.h"
80 #include "checkpoint.h"
81 #include "mtop_util.h"
82 #include "sighandler.h"
83 #include "txtdump.h"
84 #include "string2.h"
85 #include "pme_loadbal.h"
86 #include "bondf.h"
87 #include "membed.h"
88 #include "types/nlistheuristics.h"
89 #include "types/iteratedconstraints.h"
90 #include "nbnxn_cuda_data_mgmt.h"
91
92 #ifdef GMX_LIB_MPI
93 #include <mpi.h>
94 #endif
95 #ifdef GMX_THREAD_MPI
96 #include "tmpi.h"
97 #endif
98
99 #ifdef GMX_FAHCORE
100 #include "corewrap.h"
101 #endif
102
103 static void reset_all_counters(FILE *fplog,t_commrec *cr,
104                                gmx_large_int_t step,
105                                gmx_large_int_t *step_rel,t_inputrec *ir,
106                                gmx_wallcycle_t wcycle,t_nrnb *nrnb,
107                                gmx_runtime_t *runtime,
108                                nbnxn_cuda_ptr_t cu_nbv)
109 {
110     char sbuf[STEPSTRSIZE];
111
112     /* Reset all the counters related to performance over the run */
113     md_print_warn(cr,fplog,"step %s: resetting all time and cycle counters\n",
114                   gmx_step_str(step,sbuf));
115
116     if (cu_nbv)
117     {
118         nbnxn_cuda_reset_timings(cu_nbv);
119     }
120
121     wallcycle_stop(wcycle,ewcRUN);
122     wallcycle_reset_all(wcycle);
123     if (DOMAINDECOMP(cr))
124     {
125         reset_dd_statistics_counters(cr->dd);
126     }
127     init_nrnb(nrnb);
128     ir->init_step += *step_rel;
129     ir->nsteps    -= *step_rel;
130     *step_rel = 0;
131     wallcycle_start(wcycle,ewcRUN);
132     runtime_start(runtime);
133     print_date_and_time(fplog,cr->nodeid,"Restarted time",runtime);
134 }
135
136 double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
137              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
138              int nstglobalcomm,
139              gmx_vsite_t *vsite,gmx_constr_t constr,
140              int stepout,t_inputrec *ir,
141              gmx_mtop_t *top_global,
142              t_fcdata *fcd,
143              t_state *state_global,
144              t_mdatoms *mdatoms,
145              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
146              gmx_edsam_t ed,t_forcerec *fr,
147              int repl_ex_nst,int repl_ex_nex,int repl_ex_seed,gmx_membed_t membed,
148              real cpt_period,real max_hours,
149              const char *deviceOptions,
150              unsigned long Flags,
151              gmx_runtime_t *runtime)
152 {
153     gmx_mdoutf_t *outf;
154     gmx_large_int_t step,step_rel;
155     double     run_time;
156     double     t,t0,lam0[efptNR];
157     gmx_bool       bGStatEveryStep,bGStat,bCalcVir,bCalcEner;
158     gmx_bool       bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
159                bFirstStep,bStateFromCP,bStateFromTPX,bInitStep,bLastStep,
160                bBornRadii,bStartingFromCpt;
161     gmx_bool   bDoDHDL=FALSE,bDoFEP=FALSE,bDoExpanded=FALSE;
162     gmx_bool       do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
163                bForceUpdate=FALSE,bCPT;
164     int        mdof_flags;
165     gmx_bool       bMasterState;
166     int        force_flags,cglo_flags;
167     tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
168     int        i,m;
169     t_trxstatus *status;
170     rvec       mu_tot;
171     t_vcm      *vcm;
172     t_state    *bufstate=NULL;   
173     matrix     *scale_tot,pcoupl_mu,M,ebox;
174     gmx_nlheur_t nlh;
175     t_trxframe rerun_fr;
176     gmx_repl_ex_t repl_ex=NULL;
177     int        nchkpt=1;
178     gmx_localtop_t *top;        
179     t_mdebin *mdebin=NULL;
180     df_history_t df_history;
181     t_state    *state=NULL;
182     rvec       *f_global=NULL;
183     int        n_xtc=-1;
184     rvec       *x_xtc=NULL;
185     gmx_enerdata_t *enerd;
186     rvec       *f=NULL;
187     gmx_global_stat_t gstat;
188     gmx_update_t upd=NULL;
189     t_graph    *graph=NULL;
190     globsig_t   gs;
191     gmx_rng_t mcrng=NULL;
192     gmx_bool        bFFscan;
193     gmx_groups_t *groups;
194     gmx_ekindata_t *ekind, *ekind_save;
195     gmx_shellfc_t shellfc;
196     int         count,nconverged=0;
197     real        timestep=0;
198     double      tcount=0;
199     gmx_bool        bIonize=FALSE;
200     gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
201     gmx_bool        bAppend;
202     gmx_bool        bResetCountersHalfMaxH=FALSE;
203     gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
204     gmx_bool        bUpdateDoLR;
205     real        mu_aver=0,dvdl;
206     int         a0,a1,gnx=0,ii;
207     atom_id     *grpindex=NULL;
208     char        *grpname;
209     t_coupl_rec *tcr=NULL;
210     rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
211     matrix      boxcopy={{0}},lastbox;
212         tensor      tmpvir;
213         real        fom,oldfom,veta_save,pcurr,scalevir,tracevir;
214         real        vetanew = 0;
215     int         lamnew=0;
216     /* for FEP */
217     int         fep_state=0;
218     int         nstfep;
219     real        rate;
220     double      cycles;
221         real        saved_conserved_quantity = 0;
222     real        last_ekin = 0;
223         int         iter_i;
224         t_extmass   MassQ;
225     int         **trotter_seq; 
226     char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
227     int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
228     gmx_iterate_t iterate;
229     gmx_large_int_t multisim_nsteps=-1; /* number of steps to do  before first multisim 
230                                           simulation stops. If equal to zero, don't
231                                           communicate any more between multisims.*/
232     /* PME load balancing data for GPU kernels */
233     pme_load_balancing_t pme_loadbal=NULL;
234     double          cycles_pmes;
235     gmx_bool        bPMETuneTry=FALSE,bPMETuneRunning=FALSE;
236
237     if(MASTER(cr))
238     {
239         gmx_warning("New C kernels (and force-only) kernels are now enabled,\n"
240                     "but it will be another couple of days for SSE/AVX kernels.\n\n");
241     }
242
243 #ifdef GMX_FAHCORE
244     /* Temporary addition for FAHCORE checkpointing */
245     int chkpt_ret;
246 #endif
247     
248     /* Check for special mdrun options */
249     bRerunMD = (Flags & MD_RERUN);
250     bIonize  = (Flags & MD_IONIZE);
251     bFFscan  = (Flags & MD_FFSCAN);
252     bAppend  = (Flags & MD_APPENDFILES);
253     if (Flags & MD_RESETCOUNTERSHALFWAY)
254     {
255         if (ir->nsteps > 0)
256         {
257             /* Signal to reset the counters half the simulation steps. */
258             wcycle_set_reset_counters(wcycle,ir->nsteps/2);
259         }
260         /* Signal to reset the counters halfway the simulation time. */
261         bResetCountersHalfMaxH = (max_hours > 0);
262     }
263
264     /* md-vv uses averaged full step velocities for T-control 
265        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
266        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
267     bVV = EI_VV(ir->eI);
268     if (bVV) /* to store the initial velocities while computing virial */
269     {
270         snew(cbuf,top_global->natoms);
271     }
272     /* all the iteratative cases - only if there are constraints */ 
273     bIterations = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
274     bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
275     
276     if (bRerunMD)
277     {
278         /* Since we don't know if the frames read are related in any way,
279          * rebuild the neighborlist at every step.
280          */
281         ir->nstlist       = 1;
282         ir->nstcalcenergy = 1;
283         nstglobalcomm     = 1;
284     }
285
286     check_ir_old_tpx_versions(cr,fplog,ir,top_global);
287
288     nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
289     bGStatEveryStep = (nstglobalcomm == 1);
290
291     if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
292     {
293         fprintf(fplog,
294                 "To reduce the energy communication with nstlist = -1\n"
295                 "the neighbor list validity should not be checked at every step,\n"
296                 "this means that exact integration is not guaranteed.\n"
297                 "The neighbor list validity is checked after:\n"
298                 "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
299                 "In most cases this will result in exact integration.\n"
300                 "This reduces the energy communication by a factor of 2 to 3.\n"
301                 "If you want less energy communication, set nstlist > 3.\n\n");
302     }
303
304     if (bRerunMD || bFFscan)
305     {
306         ir->nstxtcout = 0;
307     }
308     groups = &top_global->groups;
309
310     /* Initial values */
311     init_md(fplog,cr,ir,oenv,&t,&t0,state_global->lambda,
312             &(state_global->fep_state),lam0,
313             nrnb,top_global,&upd,
314             nfile,fnm,&outf,&mdebin,
315             force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
316
317     clear_mat(total_vir);
318     clear_mat(pres);
319     /* Energy terms and groups */
320     snew(enerd,1);
321     init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
322                   enerd);
323     if (DOMAINDECOMP(cr))
324     {
325         f = NULL;
326     }
327     else
328     {
329         snew(f,top_global->natoms);
330     }
331
332     /* lambda Monte carlo random number generator  */
333     if (ir->bExpanded)
334     {
335         mcrng = gmx_rng_init(ir->expandedvals->lmc_seed);
336     }
337     /* copy the state into df_history */
338     copy_df_history(&df_history,&state_global->dfhist);
339
340     /* Kinetic energy data */
341     snew(ekind,1);
342     init_ekindata(fplog,top_global,&(ir->opts),ekind);
343     /* needed for iteration of constraints */
344     snew(ekind_save,1);
345     init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
346     /* Copy the cos acceleration to the groups struct */    
347     ekind->cosacc.cos_accel = ir->cos_accel;
348
349     gstat = global_stat_init(ir);
350     debug_gmx();
351
352     /* Check for polarizable models and flexible constraints */
353     shellfc = init_shell_flexcon(fplog,
354                                  top_global,n_flexible_constraints(constr),
355                                  (ir->bContinuation || 
356                                   (DOMAINDECOMP(cr) && !MASTER(cr))) ?
357                                  NULL : state_global->x);
358
359     if (DEFORM(*ir))
360     {
361 #ifdef GMX_THREAD_MPI
362         tMPI_Thread_mutex_lock(&deform_init_box_mutex);
363 #endif
364         set_deform_reference_box(upd,
365                                  deform_init_init_step_tpx,
366                                  deform_init_box_tpx);
367 #ifdef GMX_THREAD_MPI
368         tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
369 #endif
370     }
371
372     {
373         double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
374         if ((io > 2000) && MASTER(cr))
375             fprintf(stderr,
376                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
377                     io);
378     }
379
380     if (DOMAINDECOMP(cr)) {
381         top = dd_init_local_top(top_global);
382
383         snew(state,1);
384         dd_init_local_state(cr->dd,state_global,state);
385
386         if (DDMASTER(cr->dd) && ir->nstfout) {
387             snew(f_global,state_global->natoms);
388         }
389     } else {
390         if (PAR(cr)) {
391             /* Initialize the particle decomposition and split the topology */
392             top = split_system(fplog,top_global,ir,cr);
393
394             pd_cg_range(cr,&fr->cg0,&fr->hcg);
395             pd_at_range(cr,&a0,&a1);
396         } else {
397             top = gmx_mtop_generate_local_top(top_global,ir);
398
399             a0 = 0;
400             a1 = top_global->natoms;
401         }
402
403         forcerec_set_excl_load(fr,top,cr);
404
405         state = partdec_init_local_state(cr,state_global);
406         f_global = f;
407
408         atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
409
410         if (vsite) {
411             set_vsite_top(vsite,top,mdatoms,cr);
412         }
413
414         if (ir->ePBC != epbcNONE && !fr->bMolPBC) {
415             graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
416         }
417
418         if (shellfc) {
419             make_local_shells(cr,mdatoms,shellfc);
420         }
421
422         init_bonded_thread_force_reduction(fr,&top->idef);
423
424         if (ir->pull && PAR(cr)) {
425             dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
426         }
427     }
428
429     if (DOMAINDECOMP(cr))
430     {
431         /* Distribute the charge groups over the nodes from the master node */
432         dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
433                             state_global,top_global,ir,
434                             state,&f,mdatoms,top,fr,
435                             vsite,shellfc,constr,
436                             nrnb,wcycle,FALSE);
437
438     }
439
440     update_mdatoms(mdatoms,state->lambda[efptMASS]);
441
442     if (opt2bSet("-cpi",nfile,fnm))
443     {
444         bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr);
445     }
446     else
447     {
448         bStateFromCP = FALSE;
449     }
450
451     if (MASTER(cr))
452     {
453         if (bStateFromCP)
454         {
455             /* Update mdebin with energy history if appending to output files */
456             if ( Flags & MD_APPENDFILES )
457             {
458                 restore_energyhistory_from_state(mdebin,&state_global->enerhist);
459             }
460             else
461             {
462                 /* We might have read an energy history from checkpoint,
463                  * free the allocated memory and reset the counts.
464                  */
465                 done_energyhistory(&state_global->enerhist);
466                 init_energyhistory(&state_global->enerhist);
467             }
468         }
469         /* Set the initial energy history in state by updating once */
470         update_energyhistory(&state_global->enerhist,mdebin);
471     }   
472
473     if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) 
474     {
475         /* Set the random state if we read a checkpoint file */
476         set_stochd_state(upd,state);
477     }
478
479     if (state->flags & (1<<estMC_RNG))
480     {
481         set_mc_state(mcrng,state);
482     }
483
484     /* Initialize constraints */
485     if (constr) {
486         if (!DOMAINDECOMP(cr))
487             set_constraints(constr,top,ir,mdatoms,cr);
488     }
489
490     /* Check whether we have to GCT stuff */
491     bTCR = ftp2bSet(efGCT,nfile,fnm);
492     if (bTCR) {
493         if (MASTER(cr)) {
494             fprintf(stderr,"Will do General Coupling Theory!\n");
495         }
496         gnx = top_global->mols.nr;
497         snew(grpindex,gnx);
498         for(i=0; (i<gnx); i++) {
499             grpindex[i] = i;
500         }
501     }
502
503     if (repl_ex_nst > 0)
504     {
505         /* We need to be sure replica exchange can only occur
506          * when the energies are current */
507         check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
508                         "repl_ex_nst",&repl_ex_nst);
509         /* This check needs to happen before inter-simulation
510          * signals are initialized, too */
511     }
512     if (repl_ex_nst > 0 && MASTER(cr))
513     {
514         repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
515                                         repl_ex_nst,repl_ex_nex,repl_ex_seed);
516     }
517
518     /* PME tuning is only supported with GPUs or PME nodes and not with rerun */
519     if ((Flags & MD_TUNEPME) &&
520         EEL_PME(fr->eeltype) &&
521         ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) &&
522         !bRerunMD)
523     {
524         pme_loadbal_init(&pme_loadbal,ir,state->box,fr->ic,fr->pmedata);
525         cycles_pmes = 0;
526         if (cr->duty & DUTY_PME)
527         {
528             /* Start tuning right away, as we can't measure the load */
529             bPMETuneRunning = TRUE;
530         }
531         else
532         {
533             /* Separate PME nodes, we can measure the PP/PME load balance */
534             bPMETuneTry = TRUE;
535         }
536     }
537
538     if (!ir->bContinuation && !bRerunMD)
539     {
540         if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
541         {
542             /* Set the velocities of frozen particles to zero */
543             for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
544             {
545                 for(m=0; m<DIM; m++)
546                 {
547                     if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
548                     {
549                         state->v[i][m] = 0;
550                     }
551                 }
552             }
553         }
554
555         if (constr)
556         {
557             /* Constrain the initial coordinates and velocities */
558             do_constrain_first(fplog,constr,ir,mdatoms,state,f,
559                                graph,cr,nrnb,fr,top,shake_vir);
560         }
561         if (vsite)
562         {
563             /* Construct the virtual sites for the initial configuration */
564             construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
565                              top->idef.iparams,top->idef.il,
566                              fr->ePBC,fr->bMolPBC,graph,cr,state->box);
567         }
568     }
569
570     debug_gmx();
571   
572     /* set free energy calculation frequency as the minimum of nstdhdl, nstexpanded, and nstrepl_ex_nst*/
573     nstfep = ir->fepvals->nstdhdl;
574     if (ir->bExpanded && (nstfep > ir->expandedvals->nstexpanded))
575     {
576         nstfep = ir->expandedvals->nstexpanded;
577     }
578     if (repl_ex_nst > 0 && repl_ex_nst > nstfep)
579     {
580         nstfep = repl_ex_nst;
581     }
582
583     /* I'm assuming we need global communication the first time! MRS */
584     cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
585                   | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM:0)
586                   | (bVV ? CGLO_PRESSURE:0)
587                   | (bVV ? CGLO_CONSTRAINT:0)
588                   | (bRerunMD ? CGLO_RERUNMD:0)
589                   | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
590     
591     bSumEkinhOld = FALSE;
592     compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
593                     NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
594                     constr,NULL,FALSE,state->box,
595                     top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
596     if (ir->eI == eiVVAK) {
597         /* a second call to get the half step temperature initialized as well */ 
598         /* we do the same call as above, but turn the pressure off -- internally to 
599            compute_globals, this is recognized as a velocity verlet half-step 
600            kinetic energy calculation.  This minimized excess variables, but 
601            perhaps loses some logic?*/
602         
603         compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
604                         NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
605                         constr,NULL,FALSE,state->box,
606                         top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
607                         cglo_flags &~ (CGLO_STOPCM | CGLO_PRESSURE));
608     }
609     
610     /* Calculate the initial half step temperature, and save the ekinh_old */
611     if (!(Flags & MD_STARTFROMCPT)) 
612     {
613         for(i=0; (i<ir->opts.ngtc); i++) 
614         {
615             copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
616         } 
617     }
618     if (ir->eI != eiVV) 
619     {
620         enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
621                                      and there is no previous step */
622     }
623     
624     /* if using an iterative algorithm, we need to create a working directory for the state. */
625     if (bIterations) 
626     {
627             bufstate = init_bufstate(state);
628     }
629     if (bFFscan) 
630     {
631         snew(xcopy,state->natoms);
632         snew(vcopy,state->natoms);
633         copy_rvecn(state->x,xcopy,0,state->natoms);
634         copy_rvecn(state->v,vcopy,0,state->natoms);
635         copy_mat(state->box,boxcopy);
636     } 
637     
638     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
639        temperature control */
640     trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
641     
642     if (MASTER(cr))
643     {
644         if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
645         {
646             fprintf(fplog,
647                     "RMS relative constraint deviation after constraining: %.2e\n",
648                     constr_rmsd(constr,FALSE));
649         }
650         if (EI_STATE_VELOCITY(ir->eI))
651         {
652             fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
653         }
654         if (bRerunMD)
655         {
656             fprintf(stderr,"starting md rerun '%s', reading coordinates from"
657                     " input trajectory '%s'\n\n",
658                     *(top_global->name),opt2fn("-rerun",nfile,fnm));
659             if (bVerbose)
660             {
661                 fprintf(stderr,"Calculated time to finish depends on nsteps from "
662                         "run input file,\nwhich may not correspond to the time "
663                         "needed to process input trajectory.\n\n");
664             }
665         }
666         else
667         {
668             char tbuf[20];
669             fprintf(stderr,"starting mdrun '%s'\n",
670                     *(top_global->name));
671             if (ir->nsteps >= 0)
672             {
673                 sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
674             }
675             else
676             {
677                 sprintf(tbuf,"%s","infinite");
678             }
679             if (ir->init_step > 0)
680             {
681                 fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
682                         gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
683                         gmx_step_str(ir->init_step,sbuf2),
684                         ir->init_step*ir->delta_t);
685             }
686             else
687             {
688                 fprintf(stderr,"%s steps, %s ps.\n",
689                         gmx_step_str(ir->nsteps,sbuf),tbuf);
690             }
691         }
692         fprintf(fplog,"\n");
693     }
694
695     /* Set and write start time */
696     runtime_start(runtime);
697     print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
698     wallcycle_start(wcycle,ewcRUN);
699     if (fplog)
700     {
701         fprintf(fplog,"\n");
702     }
703
704     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
705 #ifdef GMX_FAHCORE
706     chkpt_ret=fcCheckPointParallel( cr->nodeid,
707                                     NULL,0);
708     if ( chkpt_ret == 0 ) 
709         gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
710 #endif
711
712     debug_gmx();
713     /***********************************************************
714      *
715      *             Loop over MD steps 
716      *
717      ************************************************************/
718
719     /* if rerunMD then read coordinates and velocities from input trajectory */
720     if (bRerunMD)
721     {
722         if (getenv("GMX_FORCE_UPDATE"))
723         {
724             bForceUpdate = TRUE;
725         }
726
727         rerun_fr.natoms = 0;
728         if (MASTER(cr))
729         {
730             bNotLastFrame = read_first_frame(oenv,&status,
731                                              opt2fn("-rerun",nfile,fnm),
732                                              &rerun_fr,TRX_NEED_X | TRX_READ_V);
733             if (rerun_fr.natoms != top_global->natoms)
734             {
735                 gmx_fatal(FARGS,
736                           "Number of atoms in trajectory (%d) does not match the "
737                           "run input file (%d)\n",
738                           rerun_fr.natoms,top_global->natoms);
739             }
740             if (ir->ePBC != epbcNONE)
741             {
742                 if (!rerun_fr.bBox)
743                 {
744                     gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
745                 }
746                 if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
747                 {
748                     gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
749                 }
750             }
751         }
752
753         if (PAR(cr))
754         {
755             rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
756         }
757
758         if (ir->ePBC != epbcNONE)
759         {
760             /* Set the shift vectors.
761              * Necessary here when have a static box different from the tpr box.
762              */
763             calc_shifts(rerun_fr.box,fr->shift_vec);
764         }
765     }
766
767     /* loop over MD steps or if rerunMD to end of input trajectory */
768     bFirstStep = TRUE;
769     /* Skip the first Nose-Hoover integration when we get the state from tpx */
770     bStateFromTPX = !bStateFromCP;
771     bInitStep = bFirstStep && (bStateFromTPX || bVV);
772     bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
773     bLastStep    = FALSE;
774     bSumEkinhOld = FALSE;
775     bExchanged   = FALSE;
776
777     init_global_signals(&gs,cr,ir,repl_ex_nst);
778
779     step = ir->init_step;
780     step_rel = 0;
781
782     if (ir->nstlist == -1)
783     {
784         init_nlistheuristics(&nlh,bGStatEveryStep,step);
785     }
786
787     if (MULTISIM(cr) && (repl_ex_nst <=0 ))
788     {
789         /* check how many steps are left in other sims */
790         multisim_nsteps=get_multisim_nsteps(cr, ir->nsteps);
791     }
792
793
794     /* and stop now if we should */
795     bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
796                  ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
797     while (!bLastStep || (bRerunMD && bNotLastFrame)) {
798
799         wallcycle_start(wcycle,ewcSTEP);
800
801         if (bRerunMD) {
802             if (rerun_fr.bStep) {
803                 step = rerun_fr.step;
804                 step_rel = step - ir->init_step;
805             }
806             if (rerun_fr.bTime) {
807                 t = rerun_fr.time;
808             }
809             else
810             {
811                 t = step;
812             }
813         } 
814         else 
815         {
816             bLastStep = (step_rel == ir->nsteps);
817             t = t0 + step*ir->delta_t;
818         }
819
820         if (ir->efep != efepNO || ir->bSimTemp)
821             {
822             /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
823                requiring different logic. */
824             
825             set_current_lambdas(step,ir->fepvals,bRerunMD,&rerun_fr,state_global,state,lam0);
826             bDoDHDL = do_per_step(step,ir->fepvals->nstdhdl);
827             bDoFEP  = (do_per_step(step,nstfep) && (ir->efep != efepNO));
828             bDoExpanded  = (do_per_step(step,ir->expandedvals->nstexpanded) && (ir->bExpanded) && (step > 0));
829         }
830
831         if (bSimAnn) 
832         {
833             update_annealing_target_temp(&(ir->opts),t);
834         }
835
836         if (bRerunMD)
837         {
838             if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
839             {
840                 for(i=0; i<state_global->natoms; i++)
841                 {
842                     copy_rvec(rerun_fr.x[i],state_global->x[i]);
843                 }
844                 if (rerun_fr.bV)
845                 {
846                     for(i=0; i<state_global->natoms; i++)
847                     {
848                         copy_rvec(rerun_fr.v[i],state_global->v[i]);
849                     }
850                 }
851                 else
852                 {
853                     for(i=0; i<state_global->natoms; i++)
854                     {
855                         clear_rvec(state_global->v[i]);
856                     }
857                     if (bRerunWarnNoV)
858                     {
859                         fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
860                                 "         Ekin, temperature and pressure are incorrect,\n"
861                                 "         the virial will be incorrect when constraints are present.\n"
862                                 "\n");
863                         bRerunWarnNoV = FALSE;
864                     }
865                 }
866             }
867             copy_mat(rerun_fr.box,state_global->box);
868             copy_mat(state_global->box,state->box);
869
870             if (vsite && (Flags & MD_RERUN_VSITE))
871             {
872                 if (DOMAINDECOMP(cr))
873                 {
874                     gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
875                 }
876                 if (graph)
877                 {
878                     /* Following is necessary because the graph may get out of sync
879                      * with the coordinates if we only have every N'th coordinate set
880                      */
881                     mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
882                     shift_self(graph,state->box,state->x);
883                 }
884                 construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
885                                  top->idef.iparams,top->idef.il,
886                                  fr->ePBC,fr->bMolPBC,graph,cr,state->box);
887                 if (graph)
888                 {
889                     unshift_self(graph,state->box,state->x);
890                 }
891             }
892         }
893
894         /* Stop Center of Mass motion */
895         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
896
897         /* Copy back starting coordinates in case we're doing a forcefield scan */
898         if (bFFscan)
899         {
900             for(ii=0; (ii<state->natoms); ii++)
901             {
902                 copy_rvec(xcopy[ii],state->x[ii]);
903                 copy_rvec(vcopy[ii],state->v[ii]);
904             }
905             copy_mat(boxcopy,state->box);
906         }
907
908         if (bRerunMD)
909         {
910             /* for rerun MD always do Neighbour Searching */
911             bNS = (bFirstStep || ir->nstlist != 0);
912             bNStList = bNS;
913         }
914         else
915         {
916             /* Determine whether or not to do Neighbour Searching and LR */
917             bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
918             
919             bNS = (bFirstStep || bExchanged || bNStList || bDoFEP ||
920                    (ir->nstlist == -1 && nlh.nabnsb > 0));
921
922             if (bNS && ir->nstlist == -1)
923             {
924                 set_nlistheuristics(&nlh,bFirstStep || bExchanged || bDoFEP, step);
925             }
926         } 
927
928         /* check whether we should stop because another simulation has 
929            stopped. */
930         if (MULTISIM(cr))
931         {
932             if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&  
933                  (multisim_nsteps != ir->nsteps) )  
934             {
935                 if (bNS)
936                 {
937                     if (MASTER(cr))
938                     {
939                         fprintf(stderr, 
940                                 "Stopping simulation %d because another one has finished\n",
941                                 cr->ms->sim);
942                     }
943                     bLastStep=TRUE;
944                     gs.sig[eglsCHKPT] = 1;
945                 }
946             }
947         }
948
949         /* < 0 means stop at next step, > 0 means stop at next NS step */
950         if ( (gs.set[eglsSTOPCOND] < 0 ) ||
951              ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
952         {
953             bLastStep = TRUE;
954         }
955
956         /* Determine whether or not to update the Born radii if doing GB */
957         bBornRadii=bFirstStep;
958         if (ir->implicit_solvent && (step % ir->nstgbradii==0))
959         {
960             bBornRadii=TRUE;
961         }
962         
963         do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
964         do_verbose = bVerbose &&
965                   (step % stepout == 0 || bFirstStep || bLastStep);
966
967         if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
968         {
969             if (bRerunMD)
970             {
971                 bMasterState = TRUE;
972             }
973             else
974             {
975                 bMasterState = FALSE;
976                 /* Correct the new box if it is too skewed */
977                 if (DYNAMIC_BOX(*ir))
978                 {
979                     if (correct_box(fplog,step,state->box,graph))
980                     {
981                         bMasterState = TRUE;
982                     }
983                 }
984                 if (DOMAINDECOMP(cr) && bMasterState)
985                 {
986                     dd_collect_state(cr->dd,state,state_global);
987                 }
988             }
989
990             if (DOMAINDECOMP(cr))
991             {
992                 /* Repartition the domain decomposition */
993                 wallcycle_start(wcycle,ewcDOMDEC);
994                 dd_partition_system(fplog,step,cr,
995                                     bMasterState,nstglobalcomm,
996                                     state_global,top_global,ir,
997                                     state,&f,mdatoms,top,fr,
998                                     vsite,shellfc,constr,
999                                     nrnb,wcycle,
1000                                     do_verbose && !bPMETuneRunning);
1001                 wallcycle_stop(wcycle,ewcDOMDEC);
1002                 /* If using an iterative integrator, reallocate space to match the decomposition */
1003             }
1004         }
1005
1006         if (MASTER(cr) && do_log && !bFFscan)
1007         {
1008             print_ebin_header(fplog,step,t,state->lambda[efptFEP]); /* can we improve the information printed here? */
1009         }
1010
1011         if (ir->efep != efepNO)
1012         {
1013             update_mdatoms(mdatoms,state->lambda[efptMASS]);
1014         }
1015
1016         if ((bRerunMD && rerun_fr.bV) || bExchanged)
1017         {
1018             
1019             /* We need the kinetic energy at minus the half step for determining
1020              * the full step kinetic energy and possibly for T-coupling.*/
1021             /* This may not be quite working correctly yet . . . . */
1022             compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
1023                             wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
1024                             constr,NULL,FALSE,state->box,
1025                             top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
1026                             CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
1027         }
1028         clear_mat(force_vir);
1029         
1030         /* Ionize the atoms if necessary */
1031         if (bIonize)
1032         {
1033             ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
1034                    mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
1035         }
1036         
1037         /* Update force field in ffscan program */
1038         if (bFFscan)
1039         {
1040             if (update_forcefield(fplog,
1041                                   nfile,fnm,fr,
1042                                   mdatoms->nr,state->x,state->box))
1043             {
1044                 gmx_finalize_par();
1045
1046                 exit(0);
1047             }
1048         }
1049
1050         /* We write a checkpoint at this MD step when:
1051          * either at an NS step when we signalled through gs,
1052          * or at the last step (but not when we do not want confout),
1053          * but never at the first step or with rerun.
1054          */
1055         bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
1056                  (bLastStep && (Flags & MD_CONFOUT))) &&
1057                 step > ir->init_step && !bRerunMD);
1058         if (bCPT)
1059         {
1060             gs.set[eglsCHKPT] = 0;
1061         }
1062
1063         /* Determine the energy and pressure:
1064          * at nstcalcenergy steps and at energy output steps (set below).
1065          */
1066         if (EI_VV(ir->eI) && (!bInitStep))
1067         {
1068             /* for vv, the first half actually corresponds to the last step */
1069             bCalcEner = do_per_step(step-1,ir->nstcalcenergy);
1070         }
1071         else
1072         {
1073             bCalcEner = do_per_step(step,ir->nstcalcenergy);
1074         }
1075         bCalcVir = bCalcEner ||
1076             (ir->epc != epcNO && do_per_step(step,ir->nstpcouple));
1077
1078         /* Do we need global communication ? */
1079         bGStat = (bCalcVir || bCalcEner || bStopCM ||
1080                   do_per_step(step,nstglobalcomm) ||
1081                   (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
1082
1083         do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
1084
1085         if (do_ene || do_log)
1086         {
1087             bCalcVir  = TRUE;
1088             bCalcEner = TRUE;
1089             bGStat    = TRUE;
1090         }
1091         
1092         /* these CGLO_ options remain the same throughout the iteration */
1093         cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
1094                       (bGStat ? CGLO_GSTAT : 0)
1095             );
1096         
1097         force_flags = (GMX_FORCE_STATECHANGED |
1098                        ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
1099                        GMX_FORCE_ALLFORCES |
1100                        GMX_FORCE_SEPLRF |
1101                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
1102                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
1103                        (bDoFEP ? GMX_FORCE_DHDL : 0)
1104             );
1105
1106         if(fr->bTwinRange)
1107         {
1108             if(do_per_step(step,ir->nstcalclr))
1109             {
1110                 force_flags |= GMX_FORCE_DO_LR;
1111             }
1112         }
1113         
1114         if (shellfc)
1115         {
1116             /* Now is the time to relax the shells */
1117             count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
1118                                       ir,bNS,force_flags,
1119                                       bStopCM,top,top_global,
1120                                       constr,enerd,fcd,
1121                                       state,f,force_vir,mdatoms,
1122                                       nrnb,wcycle,graph,groups,
1123                                       shellfc,fr,bBornRadii,t,mu_tot,
1124                                       state->natoms,&bConverged,vsite,
1125                                       outf->fp_field);
1126             tcount+=count;
1127
1128             if (bConverged)
1129             {
1130                 nconverged++;
1131             }
1132         }
1133         else
1134         {
1135             /* The coordinates (x) are shifted (to get whole molecules)
1136              * in do_force.
1137              * This is parallellized as well, and does communication too. 
1138              * Check comments in sim_util.c
1139              */
1140              do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
1141                      state->box,state->x,&state->hist,
1142                      f,force_vir,mdatoms,enerd,fcd,
1143                      state->lambda,graph,
1144                      fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
1145                      (bNS ? GMX_FORCE_NS : 0) | force_flags);
1146         }
1147         
1148         if (bTCR)
1149         {
1150             mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
1151                                    mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
1152         }
1153         
1154         if (bTCR && bFirstStep)
1155         {
1156             tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
1157             fprintf(fplog,"Done init_coupling\n"); 
1158             fflush(fplog);
1159         }
1160         
1161         if (bVV && !bStartingFromCpt && !bRerunMD)
1162         /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
1163         {
1164             if (ir->eI==eiVV && bInitStep) 
1165             {
1166                 /* if using velocity verlet with full time step Ekin,
1167                  * take the first half step only to compute the 
1168                  * virial for the first step. From there,
1169                  * revert back to the initial coordinates
1170                  * so that the input is actually the initial step.
1171                  */
1172                 copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
1173             } else {
1174                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
1175                 trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
1176             }
1177
1178             /* If we are using twin-range interactions where the long-range component
1179              * is only evaluated every nstcalclr>1 steps, we should do a special update
1180              * step to combine the long-range forces on these steps.
1181              * For nstcalclr=1 this is not done, since the forces would have been added
1182              * directly to the short-range forces already.
1183              */
1184             bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
1185             
1186             update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,
1187                           f,bUpdateDoLR,fr->f_twin,fcd,
1188                           ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
1189                           cr,nrnb,constr,&top->idef);
1190             
1191             if (bIterations)
1192             {
1193                 gmx_iterate_init(&iterate,bIterations && !bInitStep);
1194             }
1195             /* for iterations, we save these vectors, as we will be self-consistently iterating
1196                the calculations */
1197
1198             /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
1199             
1200             /* save the state */
1201             if (bIterations && iterate.bIterate) { 
1202                 copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
1203             }
1204             
1205             bFirstIterate = TRUE;
1206             while (bFirstIterate || (bIterations && iterate.bIterate))
1207             {
1208                 if (bIterations && iterate.bIterate) 
1209                 {
1210                     copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
1211                     if (bFirstIterate && bTrotter) 
1212                     {
1213                         /* The first time through, we need a decent first estimate
1214                            of veta(t+dt) to compute the constraints.  Do
1215                            this by computing the box volume part of the
1216                            trotter integration at this time. Nothing else
1217                            should be changed by this routine here.  If
1218                            !(first time), we start with the previous value
1219                            of veta.  */
1220                         
1221                         veta_save = state->veta;
1222                         trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
1223                         vetanew = state->veta;
1224                         state->veta = veta_save;
1225                     } 
1226                 } 
1227                 
1228                 bOK = TRUE;
1229                 if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
1230                     dvdl = 0;
1231                     
1232                     update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
1233                                        state,fr->bMolPBC,graph,f,
1234                                        &top->idef,shake_vir,NULL,
1235                                        cr,nrnb,wcycle,upd,constr,
1236                                        bInitStep,TRUE,bCalcVir,vetanew);
1237                     
1238                     if (!bOK && !bFFscan)
1239                     {
1240                         gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
1241                     }
1242                     
1243                 } 
1244                 else if (graph)
1245                 { /* Need to unshift here if a do_force has been
1246                      called in the previous step */
1247                     unshift_self(graph,state->box,state->x);
1248                 }
1249                 
1250                 
1251                 /* if VV, compute the pressure and constraints */
1252                 /* For VV2, we strictly only need this if using pressure
1253                  * control, but we really would like to have accurate pressures
1254                  * printed out.
1255                  * Think about ways around this in the future?
1256                  * For now, keep this choice in comments.
1257                  */
1258                 /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
1259                     /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
1260                 bPres = TRUE;
1261                 bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK));
1262                 if (bCalcEner && ir->eI==eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
1263                 {
1264                     bSumEkinhOld = TRUE;
1265                 }
1266                 compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
1267                                 wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
1268                                 constr,NULL,FALSE,state->box,
1269                                 top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
1270                                 cglo_flags 
1271                                 | CGLO_ENERGY 
1272                                 | (bStopCM ? CGLO_STOPCM : 0)
1273                                 | (bTemp ? CGLO_TEMPERATURE:0) 
1274                                 | (bPres ? CGLO_PRESSURE : 0) 
1275                                 | (bPres ? CGLO_CONSTRAINT : 0)
1276                                 | ((bIterations && iterate.bIterate) ? CGLO_ITERATE : 0)  
1277                                 | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
1278                                 | CGLO_SCALEEKIN 
1279                     );
1280                 /* explanation of above: 
1281                    a) We compute Ekin at the full time step
1282                    if 1) we are using the AveVel Ekin, and it's not the
1283                    initial step, or 2) if we are using AveEkin, but need the full
1284                    time step kinetic energy for the pressure (always true now, since we want accurate statistics).
1285                    b) If we are using EkinAveEkin for the kinetic energy for the temperture control, we still feed in 
1286                    EkinAveVel because it's needed for the pressure */
1287                 
1288                 /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1289                 if (!bInitStep) 
1290                 {
1291                     if (bTrotter)
1292                     {
1293                         trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
1294                     } 
1295                     else 
1296                     {
1297                         if (bExchanged)
1298                         {
1299             
1300                             /* We need the kinetic energy at minus the half step for determining
1301                              * the full step kinetic energy and possibly for T-coupling.*/
1302                             /* This may not be quite working correctly yet . . . . */
1303                             compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
1304                                             wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
1305                                             constr,NULL,FALSE,state->box,
1306                                             top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
1307                                             CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
1308                         }
1309
1310
1311                         update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
1312                     }
1313                 }
1314                 
1315                 if (bIterations &&
1316                     done_iterating(cr,fplog,step,&iterate,bFirstIterate,
1317                                    state->veta,&vetanew)) 
1318                 {
1319                     break;
1320                 }
1321                 bFirstIterate = FALSE;
1322             }
1323
1324             if (bTrotter && !bInitStep) {
1325                 enerd->term[F_DVDL_BONDED] += dvdl;        /* only add after iterations */
1326                 copy_mat(shake_vir,state->svir_prev);
1327                 copy_mat(force_vir,state->fvir_prev);
1328                 if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
1329                     /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1330                     enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
1331                     enerd->term[F_EKIN] = trace(ekind->ekin);
1332                 }
1333             }
1334             /* if it's the initial step, we performed this first step just to get the constraint virial */
1335             if (bInitStep && ir->eI==eiVV) {
1336                 copy_rvecn(cbuf,state->v,0,state->natoms);
1337             }
1338             
1339             if (fr->bSepDVDL && fplog && do_log) 
1340             {
1341                 fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
1342             }
1343             enerd->term[F_DVDL_BONDED] += dvdl;
1344         }
1345     
1346         /* MRS -- now done iterating -- compute the conserved quantity */
1347         if (bVV) {
1348             saved_conserved_quantity = compute_conserved_from_auxiliary(ir,state,&MassQ);
1349             if (ir->eI==eiVV) 
1350             {
1351                 last_ekin = enerd->term[F_EKIN];
1352             }
1353             if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) 
1354             {
1355                 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1356             }
1357             /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
1358             sum_dhdl(enerd,state->lambda,ir->fepvals);
1359         }
1360         
1361         /* ########  END FIRST UPDATE STEP  ############## */
1362         /* ########  If doing VV, we now have v(dt) ###### */
1363         if (bDoExpanded) {
1364             /* perform extended ensemble sampling in lambda - we don't
1365                actually move to the new state before outputting
1366                statistics, but if performing simulated tempering, we
1367                do update the velocities and the tau_t. */
1368         
1369             lamnew = ExpandedEnsembleDynamics(fplog,ir,enerd,state,&MassQ,&df_history,step,mcrng,state->v,mdatoms);
1370         }
1371         /* ################## START TRAJECTORY OUTPUT ################# */
1372         
1373         /* Now we have the energies and forces corresponding to the 
1374          * coordinates at time t. We must output all of this before
1375          * the update.
1376          * for RerunMD t is read from input trajectory
1377          */
1378         mdof_flags = 0;
1379         if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
1380         if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
1381         if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
1382         if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
1383         if (bCPT) { mdof_flags |= MDOF_CPT; };
1384
1385 #if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
1386         if (bLastStep)
1387         {
1388             /* Enforce writing positions and velocities at end of run */
1389             mdof_flags |= (MDOF_X | MDOF_V);
1390         }
1391 #endif
1392 #ifdef GMX_FAHCORE
1393         if (MASTER(cr))
1394             fcReportProgress( ir->nsteps, step );
1395
1396         /* sync bCPT and fc record-keeping */
1397         if (bCPT && MASTER(cr))
1398             fcRequestCheckPoint();
1399 #endif
1400         
1401         if (mdof_flags != 0)
1402         {
1403             wallcycle_start(wcycle,ewcTRAJ);
1404             if (bCPT)
1405             {
1406                 if (state->flags & (1<<estLD_RNG))
1407                 {
1408                     get_stochd_state(upd,state);
1409                 }
1410                 if (state->flags  & (1<<estMC_RNG))
1411                 {
1412                     get_mc_state(mcrng,state);
1413                 }
1414                 if (MASTER(cr))
1415                 {
1416                     if (bSumEkinhOld)
1417                     {
1418                         state_global->ekinstate.bUpToDate = FALSE;
1419                     }
1420                     else
1421                     {
1422                         update_ekinstate(&state_global->ekinstate,ekind);
1423                         state_global->ekinstate.bUpToDate = TRUE;
1424                     }
1425                     update_energyhistory(&state_global->enerhist,mdebin);
1426                     if (ir->efep!=efepNO || ir->bSimTemp) 
1427                     {
1428                         state_global->fep_state = state->fep_state; /* MRS: seems kludgy. The code should be
1429                                                                        structured so this isn't necessary.
1430                                                                        Note this reassignment is only necessary
1431                                                                        for single threads.*/
1432                         copy_df_history(&state_global->dfhist,&df_history);
1433                     }
1434                 }
1435             }
1436             write_traj(fplog,cr,outf,mdof_flags,top_global,
1437                        step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
1438             if (bCPT)
1439             {
1440                 nchkpt++;
1441                 bCPT = FALSE;
1442             }
1443             debug_gmx();
1444             if (bLastStep && step_rel == ir->nsteps &&
1445                 (Flags & MD_CONFOUT) && MASTER(cr) &&
1446                 !bRerunMD && !bFFscan)
1447             {
1448                 /* x and v have been collected in write_traj,
1449                  * because a checkpoint file will always be written
1450                  * at the last step.
1451                  */
1452                 fprintf(stderr,"\nWriting final coordinates.\n");
1453                 if (fr->bMolPBC)
1454                 {
1455                     /* Make molecules whole only for confout writing */
1456                     do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
1457                 }
1458                 write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
1459                                     *top_global->name,top_global,
1460                                     state_global->x,state_global->v,
1461                                     ir->ePBC,state->box);
1462                 debug_gmx();
1463             }
1464             wallcycle_stop(wcycle,ewcTRAJ);
1465         }
1466         
1467         /* kludge -- virial is lost with restart for NPT control. Must restart */
1468         if (bStartingFromCpt && bVV) 
1469         {
1470             copy_mat(state->svir_prev,shake_vir);
1471             copy_mat(state->fvir_prev,force_vir);
1472         }
1473         /*  ################## END TRAJECTORY OUTPUT ################ */
1474         
1475         /* Determine the wallclock run time up till now */
1476         run_time = gmx_gettime() - (double)runtime->real;
1477
1478         /* Check whether everything is still allright */    
1479         if (((int)gmx_get_stop_condition() > handled_stop_condition)
1480 #ifdef GMX_THREAD_MPI
1481             && MASTER(cr)
1482 #endif
1483             )
1484         {
1485             /* this is just make gs.sig compatible with the hack 
1486                of sending signals around by MPI_Reduce with together with
1487                other floats */
1488             if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
1489                 gs.sig[eglsSTOPCOND]=1;
1490             if ( gmx_get_stop_condition() == gmx_stop_cond_next )
1491                 gs.sig[eglsSTOPCOND]=-1;
1492             /* < 0 means stop at next step, > 0 means stop at next NS step */
1493             if (fplog)
1494             {
1495                 fprintf(fplog,
1496                         "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
1497                         gmx_get_signal_name(),
1498                         gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
1499                 fflush(fplog);
1500             }
1501             fprintf(stderr,
1502                     "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
1503                     gmx_get_signal_name(),
1504                     gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
1505             fflush(stderr);
1506             handled_stop_condition=(int)gmx_get_stop_condition();
1507         }
1508         else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
1509                  (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
1510                  gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
1511         {
1512             /* Signal to terminate the run */
1513             gs.sig[eglsSTOPCOND] = 1;
1514             if (fplog)
1515             {
1516                 fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
1517             }
1518             fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
1519         }
1520
1521         if (bResetCountersHalfMaxH && MASTER(cr) &&
1522             run_time > max_hours*60.0*60.0*0.495)
1523         {
1524             gs.sig[eglsRESETCOUNTERS] = 1;
1525         }
1526
1527         if (ir->nstlist == -1 && !bRerunMD)
1528         {
1529             /* When bGStatEveryStep=FALSE, global_stat is only called
1530              * when we check the atom displacements, not at NS steps.
1531              * This means that also the bonded interaction count check is not
1532              * performed immediately after NS. Therefore a few MD steps could
1533              * be performed with missing interactions.
1534              * But wrong energies are never written to file,
1535              * since energies are only written after global_stat
1536              * has been called.
1537              */
1538             if (step >= nlh.step_nscheck)
1539             {
1540                 nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
1541                                                      nlh.scale_tot,state->x);
1542             }
1543             else
1544             {
1545                 /* This is not necessarily true,
1546                  * but step_nscheck is determined quite conservatively.
1547                  */
1548                 nlh.nabnsb = 0;
1549             }
1550         }
1551
1552         /* In parallel we only have to check for checkpointing in steps
1553          * where we do global communication,
1554          *  otherwise the other nodes don't know.
1555          */
1556         if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
1557                            cpt_period >= 0 &&
1558                            (cpt_period == 0 || 
1559                             run_time >= nchkpt*cpt_period*60.0)) &&
1560             gs.set[eglsCHKPT] == 0)
1561         {
1562             gs.sig[eglsCHKPT] = 1;
1563         }
1564   
1565
1566         /* at the start of step, randomize the velocities */
1567         if (ETC_ANDERSEN(ir->etc) && EI_VV(ir->eI))
1568         {
1569             gmx_bool bDoAndersenConstr;
1570             bDoAndersenConstr = (constr && update_randomize_velocities(ir,step,mdatoms,state,upd,&top->idef,constr));
1571             /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1572             if (bDoAndersenConstr)
1573             {
1574                 update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
1575                                    state,fr->bMolPBC,graph,f,
1576                                    &top->idef,tmp_vir,NULL,
1577                                    cr,nrnb,wcycle,upd,constr,
1578                                    bInitStep,TRUE,bCalcVir,vetanew);
1579             }
1580         }
1581
1582         if (bIterations)
1583         {
1584             gmx_iterate_init(&iterate,bIterations);
1585         }
1586     
1587         /* for iterations, we save these vectors, as we will be redoing the calculations */
1588         if (bIterations && iterate.bIterate) 
1589         {
1590             copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
1591         }
1592         bFirstIterate = TRUE;
1593         while (bFirstIterate || (bIterations && iterate.bIterate))
1594         {
1595             /* We now restore these vectors to redo the calculation with improved extended variables */    
1596             if (bIterations) 
1597             { 
1598                 copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
1599             }
1600
1601             /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
1602                so scroll down for that logic */
1603             
1604             /* #########   START SECOND UPDATE STEP ################# */
1605             /* Box is changed in update() when we do pressure coupling,
1606              * but we should still use the old box for energy corrections and when
1607              * writing it to the energy file, so it matches the trajectory files for
1608              * the same timestep above. Make a copy in a separate array.
1609              */
1610             copy_mat(state->box,lastbox);
1611
1612             bOK = TRUE;
1613             if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
1614             {
1615                 wallcycle_start(wcycle,ewcUPDATE);
1616                 dvdl = 0;
1617                 /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1618                 if (bTrotter) 
1619                 {
1620                     if (bIterations && iterate.bIterate) 
1621                     {
1622                         if (bFirstIterate) 
1623                         {
1624                             scalevir = 1;
1625                         }
1626                         else 
1627                         {
1628                             /* we use a new value of scalevir to converge the iterations faster */
1629                             scalevir = tracevir/trace(shake_vir);
1630                         }
1631                         msmul(shake_vir,scalevir,shake_vir); 
1632                         m_add(force_vir,shake_vir,total_vir);
1633                         clear_mat(shake_vir);
1634                     }
1635                     trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ3);
1636                 /* We can only do Berendsen coupling after we have summed
1637                  * the kinetic energy or virial. Since the happens
1638                  * in global_state after update, we should only do it at
1639                  * step % nstlist = 1 with bGStatEveryStep=FALSE.
1640                  */
1641                 }
1642                 else 
1643                 {
1644                     update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
1645                     update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
1646                                    upd,bInitStep);
1647                 }
1648
1649                 if (bVV)
1650                 {
1651                     bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
1652
1653                     /* velocity half-step update */
1654                     update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
1655                                   bUpdateDoLR,fr->f_twin,fcd,
1656                                   ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
1657                                   cr,nrnb,constr,&top->idef);
1658                 }
1659
1660                 /* Above, initialize just copies ekinh into ekin,
1661                  * it doesn't copy position (for VV),
1662                  * and entire integrator for MD.
1663                  */
1664                 
1665                 if (ir->eI==eiVVAK) 
1666                 {
1667                     copy_rvecn(state->x,cbuf,0,state->natoms);
1668                 }
1669                 bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
1670
1671                 update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
1672                               bUpdateDoLR,fr->f_twin,fcd,
1673                               ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
1674                 wallcycle_stop(wcycle,ewcUPDATE);
1675
1676                 update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,
1677                                    fr->bMolPBC,graph,f,
1678                                    &top->idef,shake_vir,force_vir,
1679                                    cr,nrnb,wcycle,upd,constr,
1680                                    bInitStep,FALSE,bCalcVir,state->veta);  
1681                 
1682                 if (ir->eI==eiVVAK) 
1683                 {
1684                     /* erase F_EKIN and F_TEMP here? */
1685                     /* just compute the kinetic energy at the half step to perform a trotter step */
1686                     compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
1687                                     wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
1688                                     constr,NULL,FALSE,lastbox,
1689                                     top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
1690                                     cglo_flags | CGLO_TEMPERATURE    
1691                         );
1692                     wallcycle_start(wcycle,ewcUPDATE);
1693                     trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ4);            
1694                     /* now we know the scaling, we can compute the positions again again */
1695                     copy_rvecn(cbuf,state->x,0,state->natoms);
1696
1697                     bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
1698
1699                     update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
1700                                   bUpdateDoLR,fr->f_twin,fcd,
1701                                   ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
1702                     wallcycle_stop(wcycle,ewcUPDATE);
1703
1704                     /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
1705                     /* are the small terms in the shake_vir here due
1706                      * to numerical errors, or are they important
1707                      * physically? I'm thinking they are just errors, but not completely sure. 
1708                      * For now, will call without actually constraining, constr=NULL*/
1709                     update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
1710                                        state,fr->bMolPBC,graph,f,
1711                                        &top->idef,tmp_vir,force_vir,
1712                                        cr,nrnb,wcycle,upd,NULL,
1713                                        bInitStep,FALSE,bCalcVir,
1714                                        state->veta);  
1715                 }
1716                 if (!bOK && !bFFscan) 
1717                 {
1718                     gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
1719                 }
1720                 
1721                 if (fr->bSepDVDL && fplog && do_log) 
1722                 {
1723                     fprintf(fplog,sepdvdlformat,"Constraint dV/dl",0.0,dvdl);
1724                 }
1725                 enerd->term[F_DVDL_BONDED] += dvdl;
1726             } 
1727             else if (graph) 
1728             {
1729                 /* Need to unshift here */
1730                 unshift_self(graph,state->box,state->x);
1731             }
1732
1733             if (vsite != NULL) 
1734             {
1735                 wallcycle_start(wcycle,ewcVSITECONSTR);
1736                 if (graph != NULL) 
1737                 {
1738                     shift_self(graph,state->box,state->x);
1739                 }
1740                 construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
1741                                  top->idef.iparams,top->idef.il,
1742                                  fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1743                 
1744                 if (graph != NULL) 
1745                 {
1746                     unshift_self(graph,state->box,state->x);
1747                 }
1748                 wallcycle_stop(wcycle,ewcVSITECONSTR);
1749             }
1750             
1751             /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */
1752             /* With Leap-Frog we can skip compute_globals at
1753              * non-communication steps, but we need to calculate
1754              * the kinetic energy one step before communication.
1755              */
1756             if (bGStat || do_per_step(step+1,nstglobalcomm) ||
1757                 EI_VV(ir->eI))
1758             {
1759                 if (ir->nstlist == -1 && bFirstIterate)
1760                 {
1761                     gs.sig[eglsNABNSB] = nlh.nabnsb;
1762                 }
1763                 compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
1764                                 wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
1765                                 constr,
1766                                 bFirstIterate ? &gs : NULL, 
1767                                 (step_rel % gs.nstms == 0) && 
1768                                 (multisim_nsteps<0 || (step_rel<multisim_nsteps)),
1769                                 lastbox,
1770                                 top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
1771                                 cglo_flags 
1772                                 | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0)
1773                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1774                                 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
1775                                 | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) 
1776                                 | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0) 
1777                                 | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
1778                                 | CGLO_CONSTRAINT 
1779                     );
1780                 if (ir->nstlist == -1 && bFirstIterate)
1781                 {
1782                     nlh.nabnsb = gs.set[eglsNABNSB];
1783                     gs.set[eglsNABNSB] = 0;
1784                 }
1785             }
1786             /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
1787             /* #############  END CALC EKIN AND PRESSURE ################# */
1788         
1789             /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1790                the virial that should probably be addressed eventually. state->veta has better properies,
1791                but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1792                generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
1793
1794             if (bIterations && 
1795                 done_iterating(cr,fplog,step,&iterate,bFirstIterate,
1796                                trace(shake_vir),&tracevir)) 
1797             {
1798                 break;
1799             }
1800             bFirstIterate = FALSE;
1801         }
1802
1803         /* only add constraint dvdl after constraints */
1804         enerd->term[F_DVDL_BONDED] += dvdl;
1805         if (!bVV)
1806         {
1807             /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */
1808             sum_dhdl(enerd,state->lambda,ir->fepvals);
1809         }
1810         update_box(fplog,step,ir,mdatoms,state,graph,f,
1811                    ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
1812         
1813         /* ################# END UPDATE STEP 2 ################# */
1814         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
1815     
1816         /* The coordinates (x) were unshifted in update */
1817         if (bFFscan && (shellfc==NULL || bConverged))
1818         {
1819             if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
1820                                  f,NULL,xcopy,
1821                                  &(top_global->mols),mdatoms->massT,pres))
1822             {
1823                 gmx_finalize_par();
1824
1825                 fprintf(stderr,"\n");
1826                 exit(0);
1827             }
1828         }
1829         if (!bGStat)
1830         {
1831             /* We will not sum ekinh_old,                                                            
1832              * so signal that we still have to do it.                                                
1833              */
1834             bSumEkinhOld = TRUE;
1835         }
1836         
1837         if (bTCR)
1838         {
1839             /* Only do GCT when the relaxation of shells (minimization) has converged,
1840              * otherwise we might be coupling to bogus energies. 
1841              * In parallel we must always do this, because the other sims might
1842              * update the FF.
1843              */
1844
1845             /* Since this is called with the new coordinates state->x, I assume
1846              * we want the new box state->box too. / EL 20040121
1847              */
1848             do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
1849                         ir,MASTER(cr),
1850                         mdatoms,&(top->idef),mu_aver,
1851                         top_global->mols.nr,cr,
1852                         state->box,total_vir,pres,
1853                         mu_tot,state->x,f,bConverged);
1854             debug_gmx();
1855         }
1856
1857         /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
1858         
1859         /* use the directly determined last velocity, not actually the averaged half steps */
1860         if (bTrotter && ir->eI==eiVV) 
1861         {
1862             enerd->term[F_EKIN] = last_ekin;
1863         }
1864         enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1865         
1866         if (bVV)
1867         {
1868             enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1869         }
1870         else 
1871         {
1872             enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir,state,&MassQ);
1873         }
1874         /* Check for excessively large energies */
1875         if (bIonize) 
1876         {
1877 #ifdef GMX_DOUBLE
1878             real etot_max = 1e200;
1879 #else
1880             real etot_max = 1e30;
1881 #endif
1882             if (fabs(enerd->term[F_ETOT]) > etot_max) 
1883             {
1884                 fprintf(stderr,"Energy too large (%g), giving up\n",
1885                         enerd->term[F_ETOT]);
1886             }
1887         }
1888         /* #########  END PREPARING EDR OUTPUT  ###########  */
1889         
1890         /* Time for performance */
1891         if (((step % stepout) == 0) || bLastStep) 
1892         {
1893             runtime_upd_proc(runtime);
1894         }
1895         
1896         /* Output stuff */
1897         if (MASTER(cr))
1898         {
1899             gmx_bool do_dr,do_or;
1900             
1901             if (fplog && do_log && bDoExpanded)
1902             {
1903                 /* only needed if doing expanded ensemble */
1904                 PrintFreeEnergyInfoToFile(fplog,ir->fepvals,ir->expandedvals,ir->bSimTemp?ir->simtempvals:NULL,
1905                                           &df_history,state->fep_state,ir->nstlog,step);
1906             }
1907             if (!(bStartingFromCpt && (EI_VV(ir->eI)))) 
1908             {
1909                 if (bCalcEner)
1910                 {
1911                     upd_mdebin(mdebin,bDoDHDL, TRUE,
1912                                t,mdatoms->tmass,enerd,state,
1913                                ir->fepvals,ir->expandedvals,lastbox,
1914                                shake_vir,force_vir,total_vir,pres,
1915                                ekind,mu_tot,constr);
1916                 }
1917                 else
1918                 {
1919                     upd_mdebin_step(mdebin);
1920                 }
1921                 
1922                 do_dr  = do_per_step(step,ir->nstdisreout);
1923                 do_or  = do_per_step(step,ir->nstorireout);
1924                 
1925                 print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
1926                            step,t,
1927                            eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
1928             }
1929             if (ir->ePull != epullNO)
1930             {
1931                 pull_print_output(ir->pull,step,t);
1932             }
1933             
1934             if (do_per_step(step,ir->nstlog))
1935             {
1936                 if(fflush(fplog) != 0)
1937                 {
1938                     gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
1939                 }
1940             }
1941         }
1942         if (bDoExpanded)
1943         {
1944             /* Have to do this part after outputting the logfile and the edr file */
1945             state->fep_state = lamnew;
1946             for (i=0;i<efptNR;i++)
1947             {
1948                 state->lambda[i] = ir->fepvals->all_lambda[i][lamnew];
1949             }
1950         }
1951         /* Remaining runtime */
1952         if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning)
1953         {
1954             if (shellfc) 
1955             {
1956                 fprintf(stderr,"\n");
1957             }
1958             print_time(stderr,runtime,step,ir,cr);
1959         }
1960
1961         /* Replica exchange */
1962         bExchanged = FALSE;
1963         if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
1964             do_per_step(step,repl_ex_nst)) 
1965         {
1966             bExchanged = replica_exchange(fplog,cr,repl_ex,
1967                                           state_global,enerd,
1968                                           state,step,t);
1969
1970             if (bExchanged && DOMAINDECOMP(cr)) 
1971             {
1972                 dd_partition_system(fplog,step,cr,TRUE,1,
1973                                     state_global,top_global,ir,
1974                                     state,&f,mdatoms,top,fr,
1975                                     vsite,shellfc,constr,
1976                                     nrnb,wcycle,FALSE);
1977             }
1978         }
1979         
1980         bFirstStep = FALSE;
1981         bInitStep = FALSE;
1982         bStartingFromCpt = FALSE;
1983
1984         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1985         /* With all integrators, except VV, we need to retain the pressure
1986          * at the current step for coupling at the next step.
1987          */
1988         if ((state->flags & (1<<estPRES_PREV)) &&
1989             (bGStatEveryStep ||
1990              (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1991         {
1992             /* Store the pressure in t_state for pressure coupling
1993              * at the next MD step.
1994              */
1995             copy_mat(pres,state->pres_prev);
1996         }
1997         
1998         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
1999
2000         if ( (membed!=NULL) && (!bLastStep) )
2001         {
2002             rescale_membed(step_rel,membed,state_global->x);
2003         }
2004
2005         if (bRerunMD) 
2006         {
2007             if (MASTER(cr))
2008             {
2009                 /* read next frame from input trajectory */
2010                 bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
2011             }
2012
2013             if (PAR(cr))
2014             {
2015                 rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
2016             }
2017         }
2018         
2019         if (!bRerunMD || !rerun_fr.bStep)
2020         {
2021             /* increase the MD step number */
2022             step++;
2023             step_rel++;
2024         }
2025         
2026         cycles = wallcycle_stop(wcycle,ewcSTEP);
2027         if (DOMAINDECOMP(cr) && wcycle)
2028         {
2029             dd_cycles_add(cr->dd,cycles,ddCyclStep);
2030         }
2031
2032         if (bPMETuneRunning || bPMETuneTry)
2033         {
2034             /* PME grid + cut-off optimization with GPUs or PME nodes */
2035
2036             /* Count the total cycles over the last steps */
2037             cycles_pmes += cycles;
2038
2039             /* We can only switch cut-off at NS steps */
2040             if (step % ir->nstlist == 0)
2041             {
2042                 /* PME grid + cut-off optimization with GPUs or PME nodes */
2043                 if (bPMETuneTry)
2044                 {
2045                     if (DDMASTER(cr->dd))
2046                     {
2047                         /* PME node load is too high, start tuning */
2048                         bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05);
2049                     }
2050                     dd_bcast(cr->dd,sizeof(gmx_bool),&bPMETuneRunning);
2051
2052                     if (bPMETuneRunning || step_rel > ir->nstlist*50)
2053                     {
2054                         bPMETuneTry     = FALSE;
2055                     }
2056                 }
2057                 if (bPMETuneRunning)
2058                 {
2059                     /* init_step might not be a multiple of nstlist,
2060                      * but the first cycle is always skipped anyhow.
2061                      */
2062                     bPMETuneRunning =
2063                         pme_load_balance(pme_loadbal,cr,
2064                                          (bVerbose && MASTER(cr)) ? stderr : NULL,
2065                                          fplog,
2066                                          ir,state,cycles_pmes,
2067                                          fr->ic,fr->nbv,&fr->pmedata,
2068                                          step);
2069
2070                     /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
2071                     fr->ewaldcoeff = fr->ic->ewaldcoeff;
2072                     fr->rlist      = fr->ic->rlist;
2073                     fr->rlistlong  = fr->ic->rlistlong;
2074                     fr->rcoulomb   = fr->ic->rcoulomb;
2075                     fr->rvdw       = fr->ic->rvdw;
2076                 }
2077                 cycles_pmes = 0;
2078             }
2079         }
2080
2081         if (step_rel == wcycle_get_reset_counters(wcycle) ||
2082             gs.set[eglsRESETCOUNTERS] != 0)
2083         {
2084             /* Reset all the counters related to performance over the run */
2085             reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime,
2086                                fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
2087             wcycle_set_reset_counters(wcycle,-1);
2088             /* Correct max_hours for the elapsed time */
2089             max_hours -= run_time/(60.0*60.0);
2090             bResetCountersHalfMaxH = FALSE;
2091             gs.set[eglsRESETCOUNTERS] = 0;
2092         }
2093
2094     }
2095     /* End of main MD loop */
2096     debug_gmx();
2097     
2098     /* Stop the time */
2099     runtime_end(runtime);
2100     
2101     if (bRerunMD && MASTER(cr))
2102     {
2103         close_trj(status);
2104     }
2105     
2106     if (!(cr->duty & DUTY_PME))
2107     {
2108         /* Tell the PME only node to finish */
2109         gmx_pme_send_finish(cr);
2110     }
2111     
2112     if (MASTER(cr))
2113     {
2114         if (ir->nstcalcenergy > 0 && !bRerunMD) 
2115         {
2116             print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
2117                        eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
2118         }
2119     }
2120
2121     done_mdoutf(outf);
2122
2123     debug_gmx();
2124
2125     if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
2126     {
2127         fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
2128         fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
2129     }
2130
2131     if (pme_loadbal != NULL)
2132     {
2133         pme_loadbal_done(pme_loadbal,fplog);
2134     }
2135
2136     if (shellfc && fplog)
2137     {
2138         fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
2139                 (nconverged*100.0)/step_rel);
2140         fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
2141                 tcount/step_rel);
2142     }
2143     
2144     if (repl_ex_nst > 0 && MASTER(cr))
2145     {
2146         print_replica_exchange_statistics(fplog,repl_ex);
2147     }
2148     
2149     runtime->nsteps_done = step_rel;
2150
2151    return 0;
2152 }