added Verlet scheme and NxN non-bonded functionality
[alexxy/gromacs.git] / src / mdlib / update.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  *          GROningen MAchine for Chemical Simulations
9  *
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  *
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  *
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  *
31  * For more info, check our website at http://www.gromacs.org
32  *
33  * And Hey:
34  * GROwing Monsters And Cloning Shrimps
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40
41 #include <stdio.h>
42 #include <math.h>
43
44 #include "types/commrec.h"
45 #include "sysstuff.h"
46 #include "smalloc.h"
47 #include "typedefs.h"
48 #include "nrnb.h"
49 #include "physics.h"
50 #include "macros.h"
51 #include "vec.h"
52 #include "main.h"
53 #include "confio.h"
54 #include "update.h"
55 #include "gmx_random.h"
56 #include "futil.h"
57 #include "mshift.h"
58 #include "tgroup.h"
59 #include "force.h"
60 #include "names.h"
61 #include "txtdump.h"
62 #include "mdrun.h"
63 #include "copyrite.h"
64 #include "constr.h"
65 #include "edsam.h"
66 #include "pull.h"
67 #include "disre.h"
68 #include "orires.h"
69 #include "gmx_wallcycle.h"
70 #include "gmx_omp_nthreads.h"
71
72 /*For debugging, start at v(-dt/2) for velolcity verlet -- uncomment next line */
73 /*#define STARTFROMDT2*/
74
75 typedef struct {
76   double gdt;
77   double eph;
78   double emh;
79   double em;
80   double b;
81   double c;
82   double d;
83 } gmx_sd_const_t;
84
85 typedef struct {
86   real V;
87   real X;
88   real Yv;
89   real Yx;
90 } gmx_sd_sigma_t;
91
92 typedef struct {
93   /* The random state */
94   gmx_rng_t gaussrand;
95   /* BD stuff */
96   real *bd_rf;
97   /* SD stuff */
98   gmx_sd_const_t *sdc;
99   gmx_sd_sigma_t *sdsig;
100   rvec *sd_V;
101   int  sd_V_nalloc;
102   /* andersen temperature control stuff */
103   gmx_bool *randomize_group;
104   real *boltzfac;
105 } gmx_stochd_t;
106
107 typedef struct gmx_update
108 {
109     gmx_stochd_t *sd;
110     /* xprime for constraint algorithms */
111     rvec *xp;
112     int  xp_nalloc;
113
114     /* variable size arrays for andersen */
115     gmx_bool *randatom;
116     int *randatom_list;
117     gmx_bool randatom_list_init;
118
119     /* Variables for the deform algorithm */
120     gmx_large_int_t deformref_step;
121     matrix     deformref_box;
122 } t_gmx_update;
123
124
125 static void do_update_md(int start,int nrend,double dt,
126                          t_grp_tcstat *tcstat,
127                          double nh_vxi[],
128                          gmx_bool bNEMD,t_grp_acc *gstat,rvec accel[],
129                          ivec nFreeze[],
130                          real invmass[],
131                          unsigned short ptype[],unsigned short cFREEZE[],
132                          unsigned short cACC[],unsigned short cTC[],
133                          rvec x[],rvec xprime[],rvec v[],
134                          rvec f[],matrix M,
135                          gmx_bool bNH,gmx_bool bPR)
136 {
137   double imass,w_dt;
138   int    gf=0,ga=0,gt=0;
139   rvec   vrel;
140   real   vn,vv,va,vb,vnrel;
141   real   lg,vxi=0,u;
142   int    n,d;
143
144   if (bNH || bPR)
145   {
146       /* Update with coupling to extended ensembles, used for
147        * Nose-Hoover and Parrinello-Rahman coupling
148        * Nose-Hoover uses the reversible leap-frog integrator from
149        * Holian et al. Phys Rev E 52(3) : 2338, 1995
150        */
151       for(n=start; n<nrend; n++)
152       {
153           imass = invmass[n];
154           if (cFREEZE)
155           {
156               gf   = cFREEZE[n];
157           }
158           if (cACC)
159           {
160               ga   = cACC[n];
161           }
162           if (cTC)
163           {
164               gt   = cTC[n];
165           }
166           lg   = tcstat[gt].lambda;
167           if (bNH) {
168               vxi   = nh_vxi[gt];
169           }
170           rvec_sub(v[n],gstat[ga].u,vrel);
171
172           for(d=0; d<DIM; d++)
173           {
174               if((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
175               {
176                   vnrel = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
177                                             - iprod(M[d],vrel)))/(1 + 0.5*vxi*dt);
178                   /* do not scale the mean velocities u */
179                   vn             = gstat[ga].u[d] + accel[ga][d]*dt + vnrel;
180                   v[n][d]        = vn;
181                   xprime[n][d]   = x[n][d]+vn*dt;
182               }
183               else
184               {
185                   v[n][d]        = 0.0;
186                   xprime[n][d]   = x[n][d];
187               }
188           }
189       }
190   } 
191   else if (cFREEZE != NULL ||
192            nFreeze[0][XX] || nFreeze[0][YY] || nFreeze[0][ZZ] ||
193            bNEMD)
194   {
195       /* Update with Berendsen/v-rescale coupling and freeze or NEMD */
196       for(n=start; n<nrend; n++) 
197       {
198           w_dt = invmass[n]*dt;
199           if (cFREEZE)
200           {
201               gf   = cFREEZE[n];
202           }
203           if (cACC)
204           {
205               ga   = cACC[n];
206           }
207           if (cTC)
208           {
209               gt   = cTC[n];
210           }
211           lg   = tcstat[gt].lambda;
212
213           for(d=0; d<DIM; d++)
214           {
215               vn             = v[n][d];
216               if((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
217               {
218                   vv             = lg*vn + f[n][d]*w_dt;
219
220                   /* do not scale the mean velocities u */
221                   u              = gstat[ga].u[d];
222                   va             = vv + accel[ga][d]*dt;
223                   vb             = va + (1.0-lg)*u;
224                   v[n][d]        = vb;
225                   xprime[n][d]   = x[n][d]+vb*dt;
226               }
227               else
228               {
229                   v[n][d]        = 0.0;
230                   xprime[n][d]   = x[n][d];
231               }
232           }
233       }
234   }
235     else
236     {
237         /* Plain update with Berendsen/v-rescale coupling */
238         for(n=start; n<nrend; n++) 
239         {
240             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
241             {
242                 w_dt = invmass[n]*dt;
243                 if (cTC)
244                 {
245                     gt = cTC[n];
246                 }
247                 lg = tcstat[gt].lambda;
248
249                 for(d=0; d<DIM; d++)
250                 {
251                     vn           = lg*v[n][d] + f[n][d]*w_dt;
252                     v[n][d]      = vn;
253                     xprime[n][d] = x[n][d] + vn*dt;
254                 }
255             }
256             else
257             {
258                 for(d=0; d<DIM; d++)
259                 {
260                     v[n][d]        = 0.0;
261                     xprime[n][d]   = x[n][d];
262                 }
263             }
264         }
265     }
266 }
267
268 static void do_update_vv_vel(int start,int nrend,double dt,
269                              t_grp_tcstat *tcstat,t_grp_acc *gstat,
270                              rvec accel[],ivec nFreeze[],real invmass[],
271                              unsigned short ptype[],unsigned short cFREEZE[],
272                              unsigned short cACC[],rvec v[],rvec f[],
273                              gmx_bool bExtended, real veta, real alpha)
274 {
275     double imass,w_dt;
276     int    gf=0,ga=0;
277     rvec   vrel;
278     real   u,vn,vv,va,vb,vnrel;
279     int    n,d;
280     double g,mv1,mv2;
281
282     if (bExtended)
283     {
284         g        = 0.25*dt*veta*alpha;
285         mv1      = exp(-g);
286         mv2      = series_sinhx(g);
287     }
288     else
289     {
290         mv1      = 1.0;
291         mv2      = 1.0;
292     }
293     for(n=start; n<nrend; n++)
294     {
295         w_dt = invmass[n]*dt;
296         if (cFREEZE)
297         {
298             gf   = cFREEZE[n];
299         }
300         if (cACC)
301         {
302             ga   = cACC[n];
303         }
304
305         for(d=0; d<DIM; d++)
306         {
307             if((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
308             {
309                 v[n][d]             = mv1*(mv1*v[n][d] + 0.5*(w_dt*mv2*f[n][d]))+0.5*accel[ga][d]*dt;
310             }
311             else
312             {
313                 v[n][d]        = 0.0;
314             }
315         }
316     }
317 } /* do_update_vv_vel */
318
319 static void do_update_vv_pos(int start,int nrend,double dt,
320                              t_grp_tcstat *tcstat,t_grp_acc *gstat,
321                              rvec accel[],ivec nFreeze[],real invmass[],
322                              unsigned short ptype[],unsigned short cFREEZE[],
323                              rvec x[],rvec xprime[],rvec v[],
324                              rvec f[],gmx_bool bExtended, real veta, real alpha)
325 {
326   double imass,w_dt;
327   int    gf=0;
328   int    n,d;
329   double g,mr1,mr2;
330
331   /* Would it make more sense if Parrinello-Rahman was put here? */
332   if (bExtended)
333   {
334       g        = 0.5*dt*veta;
335       mr1      = exp(g);
336       mr2      = series_sinhx(g);
337   } else {
338       mr1      = 1.0;
339       mr2      = 1.0;
340   }
341
342   for(n=start; n<nrend; n++) {
343
344       if (cFREEZE)
345       {
346           gf   = cFREEZE[n];
347       }
348
349       for(d=0; d<DIM; d++)
350       {
351           if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
352           {
353               xprime[n][d]   = mr1*(mr1*x[n][d]+mr2*dt*v[n][d]);
354           }
355           else
356           {
357               xprime[n][d]   = x[n][d];
358           }
359       }
360   }
361 }/* do_update_vv_pos */
362
363 static void do_update_visc(int start,int nrend,double dt,
364                            t_grp_tcstat *tcstat,
365                            double nh_vxi[],
366                            real invmass[],
367                            unsigned short ptype[],unsigned short cTC[],
368                            rvec x[],rvec xprime[],rvec v[],
369                            rvec f[],matrix M,matrix box,real
370                            cos_accel,real vcos,
371                            gmx_bool bNH,gmx_bool bPR)
372 {
373     double imass,w_dt;
374     int    gt=0;
375     real   vn,vc;
376     real   lg,vxi=0,vv;
377     real   fac,cosz;
378     rvec   vrel;
379     int    n,d;
380
381     fac = 2*M_PI/(box[ZZ][ZZ]);
382
383     if (bNH || bPR) {
384         /* Update with coupling to extended ensembles, used for
385          * Nose-Hoover and Parrinello-Rahman coupling
386          */
387         for(n=start; n<nrend; n++) {
388             imass = invmass[n];
389             if (cTC)
390             {
391                 gt   = cTC[n];
392             }
393             lg   = tcstat[gt].lambda;
394             cosz = cos(fac*x[n][ZZ]);
395
396             copy_rvec(v[n],vrel);
397
398             vc            = cosz*vcos;
399             vrel[XX]     -= vc;
400             if (bNH)
401             {
402                 vxi        = nh_vxi[gt];
403             }
404             for(d=0; d<DIM; d++)
405             {
406                 vn             = v[n][d];
407
408                 if((ptype[n] != eptVSite) && (ptype[n] != eptShell))
409                 {
410                     vn  = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
411                                             - iprod(M[d],vrel)))/(1 + 0.5*vxi*dt);
412                     if(d == XX)
413                     {
414                         vn += vc + dt*cosz*cos_accel;
415                     }
416                     v[n][d]        = vn;
417                     xprime[n][d]   = x[n][d]+vn*dt;
418                 }
419                 else
420                 {
421                     xprime[n][d]   = x[n][d];
422                 }
423             }
424         }
425     }
426     else
427     {
428         /* Classic version of update, used with berendsen coupling */
429         for(n=start; n<nrend; n++)
430         {
431             w_dt = invmass[n]*dt;
432             if (cTC)
433             {
434                 gt   = cTC[n];
435             }
436             lg   = tcstat[gt].lambda;
437             cosz = cos(fac*x[n][ZZ]);
438
439             for(d=0; d<DIM; d++)
440             {
441                 vn             = v[n][d];
442
443                 if((ptype[n] != eptVSite) && (ptype[n] != eptShell))
444                 {
445                     if(d == XX)
446                     {
447                         vc           = cosz*vcos;
448                         /* Do not scale the cosine velocity profile */
449                         vv           = vc + lg*(vn - vc + f[n][d]*w_dt);
450                         /* Add the cosine accelaration profile */
451                         vv          += dt*cosz*cos_accel;
452                     }
453                     else
454                     {
455                         vv           = lg*(vn + f[n][d]*w_dt);
456                     }
457                     v[n][d]        = vv;
458                     xprime[n][d]   = x[n][d]+vv*dt;
459                 }
460                 else
461                 {
462                     v[n][d]        = 0.0;
463                     xprime[n][d]   = x[n][d];
464                 }
465             }
466         }
467     }
468 }
469
470 static gmx_stochd_t *init_stochd(FILE *fplog,t_inputrec *ir)
471 {
472     gmx_stochd_t *sd;
473     gmx_sd_const_t *sdc;
474     int  ngtc,n;
475     real y;
476
477     snew(sd,1);
478
479     /* Initiate random number generator for langevin type dynamics,
480      * for BD, SD or velocity rescaling temperature coupling.
481      */
482     sd->gaussrand = gmx_rng_init(ir->ld_seed);
483
484     ngtc = ir->opts.ngtc;
485
486     if (ir->eI == eiBD)
487     {
488         snew(sd->bd_rf,ngtc);
489     }
490     else if (EI_SD(ir->eI))
491     {
492         snew(sd->sdc,ngtc);
493         snew(sd->sdsig,ngtc);
494
495         sdc = sd->sdc;
496         for(n=0; n<ngtc; n++)
497         {
498             if (ir->opts.tau_t[n] > 0)
499             {
500                 sdc[n].gdt = ir->delta_t/ir->opts.tau_t[n];
501                 sdc[n].eph = exp(sdc[n].gdt/2);
502                 sdc[n].emh = exp(-sdc[n].gdt/2);
503                 sdc[n].em  = exp(-sdc[n].gdt);
504             }
505             else
506             {
507                 /* No friction and noise on this group */
508                 sdc[n].gdt = 0;
509                 sdc[n].eph = 1;
510                 sdc[n].emh = 1;
511                 sdc[n].em  = 1;
512             }
513             if (sdc[n].gdt >= 0.05)
514             {
515                 sdc[n].b = sdc[n].gdt*(sdc[n].eph*sdc[n].eph - 1)
516                     - 4*(sdc[n].eph - 1)*(sdc[n].eph - 1);
517                 sdc[n].c = sdc[n].gdt - 3 + 4*sdc[n].emh - sdc[n].em;
518                 sdc[n].d = 2 - sdc[n].eph - sdc[n].emh;
519             }
520             else
521             {
522                 y = sdc[n].gdt/2;
523                 /* Seventh order expansions for small y */
524                 sdc[n].b = y*y*y*y*(1/3.0+y*(1/3.0+y*(17/90.0+y*7/9.0)));
525                 sdc[n].c = y*y*y*(2/3.0+y*(-1/2.0+y*(7/30.0+y*(-1/12.0+y*31/1260.0))));
526                 sdc[n].d = y*y*(-1+y*y*(-1/12.0-y*y/360.0));
527             }
528             if(debug)
529                 fprintf(debug,"SD const tc-grp %d: b %g  c %g  d %g\n",
530                         n,sdc[n].b,sdc[n].c,sdc[n].d);
531         }
532     }
533     else if (ETC_ANDERSEN(ir->etc))
534     {
535         int ngtc;
536         t_grpopts *opts;
537         real reft;
538
539         opts = &ir->opts;
540         ngtc = opts->ngtc;
541
542         snew(sd->randomize_group,ngtc);
543         snew(sd->boltzfac,ngtc);
544
545         /* for now, assume that all groups, if randomized, are randomized at the same rate, i.e. tau_t is the same. */
546         /* since constraint groups don't necessarily match up with temperature groups! This is checked in readir.c */
547
548         for (n=0;n<ngtc;n++) {
549             reft = max(0.0,opts->ref_t[n]);
550             if ((opts->tau_t[n] > 0) && (reft > 0))  /* tau_t or ref_t = 0 means that no randomization is done */
551             {
552                 sd->randomize_group[n] = TRUE;
553                 sd->boltzfac[n] = BOLTZ*opts->ref_t[n];
554             } else {
555                 sd->randomize_group[n] = FALSE;
556             }
557         }
558     }
559     return sd;
560 }
561
562 void get_stochd_state(gmx_update_t upd,t_state *state)
563 {
564     gmx_rng_get_state(upd->sd->gaussrand,state->ld_rng,state->ld_rngi);
565 }
566
567 void set_stochd_state(gmx_update_t upd,t_state *state)
568 {
569     gmx_rng_set_state(upd->sd->gaussrand,state->ld_rng,state->ld_rngi[0]);
570 }
571
572 gmx_update_t init_update(FILE *fplog,t_inputrec *ir)
573 {
574     t_gmx_update *upd;
575
576     snew(upd,1);
577
578     if (ir->eI == eiBD || EI_SD(ir->eI) || ir->etc == etcVRESCALE || ETC_ANDERSEN(ir->etc))
579     {
580         upd->sd = init_stochd(fplog,ir);
581     }
582
583     upd->xp = NULL;
584     upd->xp_nalloc = 0;
585     upd->randatom = NULL;
586     upd->randatom_list = NULL;
587     upd->randatom_list_init = FALSE; /* we have not yet cleared the data structure at this point */
588
589     return upd;
590 }
591
592 static void do_update_sd1(gmx_stochd_t *sd,
593                           int start,int homenr,double dt,
594                           rvec accel[],ivec nFreeze[],
595                           real invmass[],unsigned short ptype[],
596                           unsigned short cFREEZE[],unsigned short cACC[],
597                           unsigned short cTC[],
598                           rvec x[],rvec xprime[],rvec v[],rvec f[],
599                           rvec sd_X[],
600                           int ngtc,real tau_t[],real ref_t[])
601 {
602   gmx_sd_const_t *sdc;
603   gmx_sd_sigma_t *sig;
604   gmx_rng_t gaussrand;
605   real   kT;
606   int    gf=0,ga=0,gt=0;
607   real   ism,sd_V;
608   int    n,d;
609
610   sdc = sd->sdc;
611   sig = sd->sdsig;
612   if (homenr > sd->sd_V_nalloc)
613   {
614       sd->sd_V_nalloc = over_alloc_dd(homenr);
615       srenew(sd->sd_V,sd->sd_V_nalloc);
616   }
617   gaussrand = sd->gaussrand;
618
619   for(n=0; n<ngtc; n++)
620   {
621       kT = BOLTZ*ref_t[n];
622       /* The mass is encounted for later, since this differs per atom */
623       sig[n].V  = sqrt(kT*(1 - sdc[n].em*sdc[n].em));
624   }
625
626   for(n=start; n<start+homenr; n++)
627   {
628       ism = sqrt(invmass[n]);
629       if (cFREEZE)
630       {
631           gf  = cFREEZE[n];
632       }
633       if (cACC)
634       {
635           ga  = cACC[n];
636       }
637       if (cTC)
638       {
639           gt  = cTC[n];
640       }
641
642       for(d=0; d<DIM; d++)
643       {
644           if((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
645           {
646               sd_V = ism*sig[gt].V*gmx_rng_gaussian_table(gaussrand);
647
648               v[n][d] = v[n][d]*sdc[gt].em
649                   + (invmass[n]*f[n][d] + accel[ga][d])*tau_t[gt]*(1 - sdc[gt].em)
650                   + sd_V;
651
652               xprime[n][d] = x[n][d] + v[n][d]*dt;
653           }
654           else
655           {
656               v[n][d]      = 0.0;
657               xprime[n][d] = x[n][d];
658           }
659       }
660   }
661 }
662
663 static void do_update_sd2(gmx_stochd_t *sd,gmx_bool bInitStep,
664                           int start,int homenr,
665                           rvec accel[],ivec nFreeze[],
666                           real invmass[],unsigned short ptype[],
667                           unsigned short cFREEZE[],unsigned short cACC[],
668                           unsigned short cTC[],
669                           rvec x[],rvec xprime[],rvec v[],rvec f[],
670                           rvec sd_X[],
671                           int ngtc,real tau_t[],real ref_t[],
672                           gmx_bool bFirstHalf)
673 {
674   gmx_sd_const_t *sdc;
675   gmx_sd_sigma_t *sig;
676   /* The random part of the velocity update, generated in the first
677    * half of the update, needs to be remembered for the second half.
678    */
679   rvec *sd_V;
680   gmx_rng_t gaussrand;
681   real   kT;
682   int    gf=0,ga=0,gt=0;
683   real   vn=0,Vmh,Xmh;
684   real   ism;
685   int    n,d;
686
687   sdc = sd->sdc;
688   sig = sd->sdsig;
689   if (homenr > sd->sd_V_nalloc)
690   {
691       sd->sd_V_nalloc = over_alloc_dd(homenr);
692       srenew(sd->sd_V,sd->sd_V_nalloc);
693   }
694   sd_V = sd->sd_V;
695   gaussrand = sd->gaussrand;
696
697   if (bFirstHalf)
698   {
699       for (n=0; n<ngtc; n++)
700       {
701           kT = BOLTZ*ref_t[n];
702           /* The mass is encounted for later, since this differs per atom */
703           sig[n].V  = sqrt(kT*(1-sdc[n].em));
704           sig[n].X  = sqrt(kT*sqr(tau_t[n])*sdc[n].c);
705           sig[n].Yv = sqrt(kT*sdc[n].b/sdc[n].c);
706           sig[n].Yx = sqrt(kT*sqr(tau_t[n])*sdc[n].b/(1-sdc[n].em));
707       }
708   }
709
710   for (n=start; n<start+homenr; n++)
711   {
712       ism = sqrt(invmass[n]);
713       if (cFREEZE)
714       {
715           gf  = cFREEZE[n];
716       }
717       if (cACC)
718       {
719           ga  = cACC[n];
720       }
721       if (cTC)
722       {
723           gt  = cTC[n];
724       }
725
726       for(d=0; d<DIM; d++)
727       {
728           if (bFirstHalf)
729           {
730               vn             = v[n][d];
731           }
732           if((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
733           {
734               if (bFirstHalf)
735               {
736                   if (bInitStep)
737                   {
738                       sd_X[n][d] = ism*sig[gt].X*gmx_rng_gaussian_table(gaussrand);
739                   }
740                   Vmh = sd_X[n][d]*sdc[gt].d/(tau_t[gt]*sdc[gt].c)
741                       + ism*sig[gt].Yv*gmx_rng_gaussian_table(gaussrand);
742                   sd_V[n-start][d] = ism*sig[gt].V*gmx_rng_gaussian_table(gaussrand);
743
744                   v[n][d] = vn*sdc[gt].em
745                       + (invmass[n]*f[n][d] + accel[ga][d])*tau_t[gt]*(1 - sdc[gt].em)
746                       + sd_V[n-start][d] - sdc[gt].em*Vmh;
747
748                   xprime[n][d] = x[n][d] + v[n][d]*tau_t[gt]*(sdc[gt].eph - sdc[gt].emh);
749               }
750               else
751               {
752
753                   /* Correct the velocities for the constraints.
754                    * This operation introduces some inaccuracy,
755                    * since the velocity is determined from differences in coordinates.
756                    */
757                   v[n][d] =
758                       (xprime[n][d] - x[n][d])/(tau_t[gt]*(sdc[gt].eph - sdc[gt].emh));
759
760                   Xmh = sd_V[n-start][d]*tau_t[gt]*sdc[gt].d/(sdc[gt].em-1)
761                       + ism*sig[gt].Yx*gmx_rng_gaussian_table(gaussrand);
762                   sd_X[n][d] = ism*sig[gt].X*gmx_rng_gaussian_table(gaussrand);
763
764                   xprime[n][d] += sd_X[n][d] - Xmh;
765
766               }
767           }
768           else
769           {
770               if (bFirstHalf)
771               {
772                   v[n][d]        = 0.0;
773                   xprime[n][d]   = x[n][d];
774               }
775           }
776       }
777   }
778 }
779
780 static void do_update_bd(int start,int nrend,double dt,
781                          ivec nFreeze[],
782                          real invmass[],unsigned short ptype[],
783                          unsigned short cFREEZE[],unsigned short cTC[],
784                          rvec x[],rvec xprime[],rvec v[],
785                          rvec f[],real friction_coefficient,
786                          int ngtc,real tau_t[],real ref_t[],
787                          real *rf,gmx_rng_t gaussrand)
788 {
789     /* note -- these appear to be full step velocities . . .  */
790     int    gf=0,gt=0;
791     real   vn;
792     real   invfr=0;
793     int    n,d;
794
795     if (friction_coefficient != 0)
796     {
797         invfr = 1.0/friction_coefficient;
798         for(n=0; n<ngtc; n++)
799         {
800             rf[n] = sqrt(2.0*BOLTZ*ref_t[n]/(friction_coefficient*dt));
801         }
802     }
803     else
804     {
805         for(n=0; n<ngtc; n++)
806         {
807             rf[n] = sqrt(2.0*BOLTZ*ref_t[n]);
808         }
809     }
810     for(n=start; (n<nrend); n++)
811     {
812         if (cFREEZE)
813         {
814             gf = cFREEZE[n];
815         }
816         if (cTC)
817         {
818             gt = cTC[n];
819         }
820         for(d=0; (d<DIM); d++)
821         {
822             if((ptype[n]!=eptVSite) && (ptype[n]!=eptShell) && !nFreeze[gf][d])
823             {
824                 if (friction_coefficient != 0) {
825                     vn = invfr*f[n][d] + rf[gt]*gmx_rng_gaussian_table(gaussrand);
826                 }
827                 else
828                 {
829                     /* NOTE: invmass = 2/(mass*friction_constant*dt) */
830                     vn = 0.5*invmass[n]*f[n][d]*dt 
831                         + sqrt(0.5*invmass[n])*rf[gt]*gmx_rng_gaussian_table(gaussrand);
832                 }
833
834                 v[n][d]      = vn;
835                 xprime[n][d] = x[n][d]+vn*dt;
836             }
837             else
838             {
839                 v[n][d]      = 0.0;
840                 xprime[n][d] = x[n][d];
841             }
842         }
843     }
844 }
845
846 static void dump_it_all(FILE *fp,const char *title,
847                         int natoms,rvec x[],rvec xp[],rvec v[],rvec f[])
848 {
849 #ifdef DEBUG
850   if (fp)
851   {
852     fprintf(fp,"%s\n",title);
853     pr_rvecs(fp,0,"x",x,natoms);
854     pr_rvecs(fp,0,"xp",xp,natoms);
855     pr_rvecs(fp,0,"v",v,natoms);
856     pr_rvecs(fp,0,"f",f,natoms);
857   }
858 #endif
859 }
860
861 static void calc_ke_part_normal(rvec v[], t_grpopts *opts,t_mdatoms *md,
862                                 gmx_ekindata_t *ekind,t_nrnb *nrnb,gmx_bool bEkinAveVel,
863                                 gmx_bool bSaveEkinOld)
864 {
865   int          g;
866   t_grp_tcstat *tcstat=ekind->tcstat;
867   t_grp_acc    *grpstat=ekind->grpstat;
868   int          nthread,thread;
869
870   /* three main: VV with AveVel, vv with AveEkin, leap with AveEkin.  Leap with AveVel is also
871      an option, but not supported now.  Additionally, if we are doing iterations.
872      bEkinAveVel: If TRUE, we sum into ekin, if FALSE, into ekinh.
873      bSavEkinOld: If TRUE (in the case of iteration = bIterate is TRUE), we don't copy over the ekinh_old.
874      If FALSE, we overrwrite it.
875   */
876
877   /* group velocities are calculated in update_ekindata and
878    * accumulated in acumulate_groups.
879    * Now the partial global and groups ekin.
880    */
881   for(g=0; (g<opts->ngtc); g++)
882   {
883
884       if (!bSaveEkinOld) {
885           copy_mat(tcstat[g].ekinh,tcstat[g].ekinh_old);
886       }
887       if(bEkinAveVel) {
888           clear_mat(tcstat[g].ekinf);
889       } else {
890           clear_mat(tcstat[g].ekinh);
891       }
892       if (bEkinAveVel) {
893           tcstat[g].ekinscalef_nhc = 1.0;   /* need to clear this -- logic is complicated! */
894       }
895   }
896   ekind->dekindl_old = ekind->dekindl;
897   
898   nthread = gmx_omp_nthreads_get(emntUpdate);
899
900 #pragma omp parallel for num_threads(nthread) schedule(static)
901     for(thread=0; thread<nthread; thread++)
902     {
903         int  start_t,end_t,n;
904         int  ga,gt;
905         rvec v_corrt;
906         real hm;
907         int  d,m;
908         matrix *ekin_sum;
909         real   *dekindl_sum;
910
911         start_t = md->start + ((thread+0)*md->homenr)/nthread;
912         end_t   = md->start + ((thread+1)*md->homenr)/nthread;
913
914         ekin_sum    = ekind->ekin_work[thread];
915         dekindl_sum = &ekind->ekin_work[thread][opts->ngtc][0][0];
916
917         for(gt=0; gt<opts->ngtc; gt++)
918         {
919             clear_mat(ekin_sum[gt]);
920         }
921
922         ga = 0;
923         gt = 0;
924         for(n=start_t; n<end_t; n++) 
925         {
926             if (md->cACC)
927             {
928                 ga = md->cACC[n];
929             }
930             if (md->cTC)
931             {
932                 gt = md->cTC[n];
933             }
934             hm   = 0.5*md->massT[n];
935             
936             for(d=0; (d<DIM); d++) 
937             {
938                 v_corrt[d]  = v[n][d]  - grpstat[ga].u[d];
939             }
940             for(d=0; (d<DIM); d++) 
941             {
942                 for (m=0;(m<DIM); m++) 
943                 {
944                     /* if we're computing a full step velocity, v_corrt[d] has v(t).  Otherwise, v(t+dt/2) */
945                     ekin_sum[gt][m][d] += hm*v_corrt[m]*v_corrt[d];
946                 }
947             }
948             if (md->nMassPerturbed && md->bPerturbed[n]) 
949             {
950                 *dekindl_sum -=
951                     0.5*(md->massB[n] - md->massA[n])*iprod(v_corrt,v_corrt);
952             }
953         }
954     }
955
956     ekind->dekindl = 0;
957     for(thread=0; thread<nthread; thread++)
958     {
959         for(g=0; g<opts->ngtc; g++)
960         {
961             if (bEkinAveVel) 
962             {
963                 m_add(tcstat[g].ekinf,ekind->ekin_work[thread][g],
964                       tcstat[g].ekinf);
965             }
966             else
967             {
968                 m_add(tcstat[g].ekinh,ekind->ekin_work[thread][g],
969                       tcstat[g].ekinh);
970             }
971         }
972
973         ekind->dekindl += ekind->ekin_work[thread][opts->ngtc][0][0];
974     }
975
976     inc_nrnb(nrnb,eNR_EKIN,md->homenr);
977 }
978
979 static void calc_ke_part_visc(matrix box,rvec x[],rvec v[],
980                               t_grpopts *opts,t_mdatoms *md,
981                               gmx_ekindata_t *ekind,
982                               t_nrnb *nrnb, gmx_bool bEkinAveVel, gmx_bool bSaveEkinOld)
983 {
984   int          start=md->start,homenr=md->homenr;
985   int          g,d,n,m,gt=0;
986   rvec         v_corrt;
987   real         hm;
988   t_grp_tcstat *tcstat=ekind->tcstat;
989   t_cos_acc    *cosacc=&(ekind->cosacc);
990   real         dekindl;
991   real         fac,cosz;
992   double       mvcos;
993
994   for(g=0; g<opts->ngtc; g++)
995   {
996       copy_mat(ekind->tcstat[g].ekinh,ekind->tcstat[g].ekinh_old);
997       clear_mat(ekind->tcstat[g].ekinh);
998   }
999   ekind->dekindl_old = ekind->dekindl;
1000
1001   fac = 2*M_PI/box[ZZ][ZZ];
1002   mvcos = 0;
1003   dekindl = 0;
1004   for(n=start; n<start+homenr; n++)
1005   {
1006       if (md->cTC)
1007       {
1008           gt = md->cTC[n];
1009       }
1010       hm   = 0.5*md->massT[n];
1011
1012       /* Note that the times of x and v differ by half a step */
1013       /* MRS -- would have to be changed for VV */
1014       cosz         = cos(fac*x[n][ZZ]);
1015       /* Calculate the amplitude of the new velocity profile */
1016       mvcos       += 2*cosz*md->massT[n]*v[n][XX];
1017
1018       copy_rvec(v[n],v_corrt);
1019       /* Subtract the profile for the kinetic energy */
1020       v_corrt[XX] -= cosz*cosacc->vcos;
1021       for (d=0; (d<DIM); d++)
1022       {
1023           for (m=0; (m<DIM); m++)
1024           {
1025               /* if we're computing a full step velocity, v_corrt[d] has v(t).  Otherwise, v(t+dt/2) */
1026               if (bEkinAveVel)
1027               {
1028                   tcstat[gt].ekinf[m][d]+=hm*v_corrt[m]*v_corrt[d];
1029               }
1030               else
1031               {
1032                   tcstat[gt].ekinh[m][d]+=hm*v_corrt[m]*v_corrt[d];
1033               }
1034           }
1035       }
1036       if(md->nPerturbed && md->bPerturbed[n])
1037       {
1038           dekindl -= 0.5*(md->massB[n] - md->massA[n])*iprod(v_corrt,v_corrt);
1039       }
1040   }
1041   ekind->dekindl = dekindl;
1042   cosacc->mvcos = mvcos;
1043
1044   inc_nrnb(nrnb,eNR_EKIN,homenr);
1045 }
1046
1047 void calc_ke_part(t_state *state,t_grpopts *opts,t_mdatoms *md,
1048                   gmx_ekindata_t *ekind,t_nrnb *nrnb, gmx_bool bEkinAveVel, gmx_bool bSaveEkinOld)
1049 {
1050     if (ekind->cosacc.cos_accel == 0)
1051     {
1052         calc_ke_part_normal(state->v,opts,md,ekind,nrnb,bEkinAveVel,bSaveEkinOld);
1053     }
1054     else
1055     {
1056         calc_ke_part_visc(state->box,state->x,state->v,opts,md,ekind,nrnb,bEkinAveVel,bSaveEkinOld);
1057     }
1058 }
1059
1060 extern void init_ekinstate(ekinstate_t *ekinstate,const t_inputrec *ir)
1061 {
1062     ekinstate->ekin_n = ir->opts.ngtc;
1063     snew(ekinstate->ekinh,ekinstate->ekin_n);
1064     snew(ekinstate->ekinf,ekinstate->ekin_n);
1065     snew(ekinstate->ekinh_old,ekinstate->ekin_n);
1066     snew(ekinstate->ekinscalef_nhc,ekinstate->ekin_n);
1067     snew(ekinstate->ekinscaleh_nhc,ekinstate->ekin_n);
1068     snew(ekinstate->vscale_nhc,ekinstate->ekin_n);
1069     ekinstate->dekindl = 0;
1070     ekinstate->mvcos   = 0;
1071 }
1072
1073 void update_ekinstate(ekinstate_t *ekinstate,gmx_ekindata_t *ekind)
1074 {
1075   int i;
1076
1077   for(i=0;i<ekinstate->ekin_n;i++)
1078   {
1079       copy_mat(ekind->tcstat[i].ekinh,ekinstate->ekinh[i]);
1080       copy_mat(ekind->tcstat[i].ekinf,ekinstate->ekinf[i]);
1081       copy_mat(ekind->tcstat[i].ekinh_old,ekinstate->ekinh_old[i]);
1082       ekinstate->ekinscalef_nhc[i] = ekind->tcstat[i].ekinscalef_nhc;
1083       ekinstate->ekinscaleh_nhc[i] = ekind->tcstat[i].ekinscaleh_nhc;
1084       ekinstate->vscale_nhc[i] = ekind->tcstat[i].vscale_nhc;
1085   }
1086
1087   copy_mat(ekind->ekin,ekinstate->ekin_total);
1088   ekinstate->dekindl = ekind->dekindl;
1089   ekinstate->mvcos = ekind->cosacc.mvcos;
1090
1091 }
1092
1093 void restore_ekinstate_from_state(t_commrec *cr,
1094                                   gmx_ekindata_t *ekind,ekinstate_t *ekinstate)
1095 {
1096   int i,n;
1097
1098   if (MASTER(cr))
1099   {
1100       for(i=0;i<ekinstate->ekin_n;i++)
1101       {
1102           copy_mat(ekinstate->ekinh[i],ekind->tcstat[i].ekinh);
1103           copy_mat(ekinstate->ekinf[i],ekind->tcstat[i].ekinf);
1104           copy_mat(ekinstate->ekinh_old[i],ekind->tcstat[i].ekinh_old);
1105           ekind->tcstat[i].ekinscalef_nhc = ekinstate->ekinscalef_nhc[i];
1106           ekind->tcstat[i].ekinscaleh_nhc = ekinstate->ekinscaleh_nhc[i];
1107           ekind->tcstat[i].vscale_nhc = ekinstate->vscale_nhc[i];
1108       }
1109
1110       copy_mat(ekinstate->ekin_total,ekind->ekin);
1111
1112       ekind->dekindl = ekinstate->dekindl;
1113       ekind->cosacc.mvcos = ekinstate->mvcos;
1114       n = ekinstate->ekin_n;
1115   }
1116
1117   if (PAR(cr))
1118   {
1119       gmx_bcast(sizeof(n),&n,cr);
1120       for(i=0;i<n;i++)
1121       {
1122           gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinh[0][0]),
1123                     ekind->tcstat[i].ekinh[0],cr);
1124           gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinf[0][0]),
1125                     ekind->tcstat[i].ekinf[0],cr);
1126           gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinh_old[0][0]),
1127                     ekind->tcstat[i].ekinh_old[0],cr);
1128
1129           gmx_bcast(sizeof(ekind->tcstat[i].ekinscalef_nhc),
1130                     &(ekind->tcstat[i].ekinscalef_nhc),cr);
1131           gmx_bcast(sizeof(ekind->tcstat[i].ekinscaleh_nhc),
1132                     &(ekind->tcstat[i].ekinscaleh_nhc),cr);
1133           gmx_bcast(sizeof(ekind->tcstat[i].vscale_nhc),
1134                     &(ekind->tcstat[i].vscale_nhc),cr);
1135       }
1136       gmx_bcast(DIM*DIM*sizeof(ekind->ekin[0][0]),
1137                 ekind->ekin[0],cr);
1138
1139       gmx_bcast(sizeof(ekind->dekindl),&ekind->dekindl,cr);
1140       gmx_bcast(sizeof(ekind->cosacc.mvcos),&ekind->cosacc.mvcos,cr);
1141   }
1142 }
1143
1144 void set_deform_reference_box(gmx_update_t upd,gmx_large_int_t step,matrix box)
1145 {
1146     upd->deformref_step = step;
1147     copy_mat(box,upd->deformref_box);
1148 }
1149
1150 static void deform(gmx_update_t upd,
1151                    int start,int homenr,rvec x[],matrix box,matrix *scale_tot,
1152                    const t_inputrec *ir,gmx_large_int_t step)
1153 {
1154     matrix bnew,invbox,mu;
1155     real   elapsed_time;
1156     int    i,j;
1157
1158     elapsed_time = (step + 1 - upd->deformref_step)*ir->delta_t;
1159     copy_mat(box,bnew);
1160     for(i=0; i<DIM; i++)
1161     {
1162         for(j=0; j<DIM; j++)
1163         {
1164             if (ir->deform[i][j] != 0)
1165             {
1166                 bnew[i][j] =
1167                     upd->deformref_box[i][j] + elapsed_time*ir->deform[i][j];
1168             }
1169         }
1170     }
1171     /* We correct the off-diagonal elements,
1172      * which can grow indefinitely during shearing,
1173      * so the shifts do not get messed up.
1174      */
1175     for(i=1; i<DIM; i++)
1176     {
1177         for(j=i-1; j>=0; j--)
1178         {
1179             while (bnew[i][j] - box[i][j] > 0.5*bnew[j][j])
1180             {
1181                 rvec_dec(bnew[i],bnew[j]);
1182             }
1183             while (bnew[i][j] - box[i][j] < -0.5*bnew[j][j])
1184             {
1185                 rvec_inc(bnew[i],bnew[j]);
1186             }
1187         }
1188     }
1189     m_inv_ur0(box,invbox);
1190     copy_mat(bnew,box);
1191     mmul_ur0(box,invbox,mu);
1192
1193     for(i=start; i<start+homenr; i++)
1194     {
1195         x[i][XX] = mu[XX][XX]*x[i][XX]+mu[YY][XX]*x[i][YY]+mu[ZZ][XX]*x[i][ZZ];
1196         x[i][YY] = mu[YY][YY]*x[i][YY]+mu[ZZ][YY]*x[i][ZZ];
1197         x[i][ZZ] = mu[ZZ][ZZ]*x[i][ZZ];
1198     }
1199     if (*scale_tot)
1200     {
1201         /* The transposes of the scaling matrices are stored,
1202          * so we need to do matrix multiplication in the inverse order.
1203          */
1204         mmul_ur0(*scale_tot,mu,*scale_tot);
1205     }
1206 }
1207
1208 static void combine_forces(int nstlist,
1209                            gmx_constr_t constr,
1210                            t_inputrec *ir,t_mdatoms *md,t_idef *idef,
1211                            t_commrec *cr,
1212                            gmx_large_int_t step,
1213                            t_state *state,gmx_bool bMolPBC,
1214                            int start,int nrend,
1215                            rvec f[],rvec f_lr[],
1216                            t_nrnb *nrnb)
1217 {
1218     int  i,d,nm1;
1219
1220     /* f contains the short-range forces + the long range forces
1221      * which are stored separately in f_lr.
1222      */
1223
1224     if (constr != NULL && !(ir->eConstrAlg == econtSHAKE && ir->epc == epcNO))
1225     {
1226         /* We need to constrain the LR forces separately,
1227          * because due to the different pre-factor for the SR and LR
1228          * forces in the update algorithm, we can not determine
1229          * the constraint force for the coordinate constraining.
1230          * Constrain only the additional LR part of the force.
1231          */
1232         /* MRS -- need to make sure this works with trotter integration -- the constraint calls may not be right.*/
1233         constrain(NULL,FALSE,FALSE,constr,idef,ir,NULL,cr,step,0,md,
1234                   state->x,f_lr,f_lr,bMolPBC,state->box,state->lambda[efptBONDED],NULL,
1235                   NULL,NULL,nrnb,econqForce,ir->epc==epcMTTK,state->veta,state->veta);
1236     }
1237
1238     /* Add nstlist-1 times the LR force to the sum of both forces
1239      * and store the result in forces_lr.
1240      */
1241     nm1 = nstlist - 1;
1242     for(i=start; i<nrend; i++)
1243     {
1244         for(d=0; d<DIM; d++)
1245         {
1246             f_lr[i][d] = f[i][d] + nm1*f_lr[i][d];
1247         }
1248     }
1249 }
1250
1251 void update_tcouple(FILE         *fplog,
1252                     gmx_large_int_t   step,
1253                     t_inputrec   *inputrec,
1254                     t_state      *state,
1255                     gmx_ekindata_t *ekind,
1256                     gmx_wallcycle_t wcycle,
1257                     gmx_update_t upd,
1258                     t_extmass    *MassQ,
1259                     t_mdatoms  *md)
1260
1261 {
1262     gmx_bool   bTCouple=FALSE;
1263     real   dttc;
1264     int    i,start,end,homenr,offset;
1265
1266     /* if using vv with trotter decomposition methods, we do this elsewhere in the code */
1267     if (inputrec->etc != etcNO &&
1268         !(IR_NVT_TROTTER(inputrec) || IR_NPT_TROTTER(inputrec) || IR_NPH_TROTTER(inputrec)))
1269     {
1270         /* We should only couple after a step where energies were determined (for leapfrog versions)
1271          or the step energies are determined, for velocity verlet versions */
1272
1273         if (EI_VV(inputrec->eI)) {
1274             offset = 0;
1275         } else {
1276             offset = 1;
1277         }
1278         bTCouple = (inputrec->nsttcouple == 1 ||
1279                     do_per_step(step+inputrec->nsttcouple-offset,
1280                                 inputrec->nsttcouple));
1281     }
1282
1283     if (bTCouple)
1284     {
1285         dttc = inputrec->nsttcouple*inputrec->delta_t;
1286
1287         switch (inputrec->etc)
1288         {
1289         case etcNO:
1290             break;
1291         case etcBERENDSEN:
1292             berendsen_tcoupl(inputrec,ekind,dttc);
1293             break;
1294         case etcNOSEHOOVER:
1295             nosehoover_tcoupl(&(inputrec->opts),ekind,dttc,
1296                               state->nosehoover_xi,state->nosehoover_vxi,MassQ);
1297             break;
1298         case etcVRESCALE:
1299             vrescale_tcoupl(inputrec,ekind,dttc,
1300                             state->therm_integral,upd->sd->gaussrand);
1301             break;
1302         }
1303         /* rescale in place here */
1304         if (EI_VV(inputrec->eI))
1305         {
1306             rescale_velocities(ekind,md,md->start,md->start+md->homenr,state->v);
1307         }
1308     }
1309     else
1310     {
1311         /* Set the T scaling lambda to 1 to have no scaling */
1312         for(i=0; (i<inputrec->opts.ngtc); i++)
1313         {
1314             ekind->tcstat[i].lambda = 1.0;
1315         }
1316     }
1317 }
1318
1319 void update_pcouple(FILE         *fplog,
1320                     gmx_large_int_t   step,
1321                     t_inputrec   *inputrec,
1322                     t_state      *state,
1323                     matrix       pcoupl_mu,
1324                     matrix       M,
1325                     gmx_wallcycle_t wcycle,
1326                     gmx_update_t upd,
1327                     gmx_bool         bInitStep)
1328 {
1329     gmx_bool   bPCouple=FALSE;
1330     real   dtpc=0;
1331     int    i;
1332
1333     /* if using Trotter pressure, we do this in coupling.c, so we leave it false. */
1334     if (inputrec->epc != epcNO && (!(IR_NPT_TROTTER(inputrec) || IR_NPH_TROTTER(inputrec))))
1335     {
1336         /* We should only couple after a step where energies were determined */
1337         bPCouple = (inputrec->nstpcouple == 1 ||
1338                     do_per_step(step+inputrec->nstpcouple-1,
1339                                 inputrec->nstpcouple));
1340     }
1341
1342     clear_mat(pcoupl_mu);
1343     for(i=0; i<DIM; i++)
1344     {
1345         pcoupl_mu[i][i] = 1.0;
1346     }
1347
1348     clear_mat(M);
1349
1350     if (bPCouple)
1351     {
1352         dtpc = inputrec->nstpcouple*inputrec->delta_t;
1353
1354         switch (inputrec->epc)
1355         {
1356             /* We can always pcoupl, even if we did not sum the energies
1357              * the previous step, since state->pres_prev is only updated
1358              * when the energies have been summed.
1359              */
1360         case (epcNO):
1361             break;
1362         case (epcBERENDSEN):
1363             if (!bInitStep)
1364             {
1365                 berendsen_pcoupl(fplog,step,inputrec,dtpc,state->pres_prev,state->box,
1366                                  pcoupl_mu);
1367             }
1368             break;
1369         case (epcPARRINELLORAHMAN):
1370             parrinellorahman_pcoupl(fplog,step,inputrec,dtpc,state->pres_prev,
1371                                     state->box,state->box_rel,state->boxv,
1372                                     M,pcoupl_mu,bInitStep);
1373             break;
1374         default:
1375             break;
1376         }
1377     }
1378 }
1379
1380 static rvec *get_xprime(const t_state *state,gmx_update_t upd)
1381 {
1382     if (state->nalloc > upd->xp_nalloc)
1383     {
1384         upd->xp_nalloc = state->nalloc;
1385         srenew(upd->xp,upd->xp_nalloc);
1386     }
1387
1388     return upd->xp;
1389 }
1390
1391 void update_constraints(FILE         *fplog,
1392                         gmx_large_int_t   step,
1393                         real         *dvdlambda,    /* the contribution to be added to the bonded interactions */
1394                         t_inputrec   *inputrec,      /* input record and box stuff      */
1395                         gmx_ekindata_t *ekind,
1396                         t_mdatoms    *md,
1397                         t_state      *state,
1398                         gmx_bool     bMolPBC,
1399                         t_graph      *graph,
1400                         rvec         force[],        /* forces on home particles */
1401                         t_idef       *idef,
1402                         tensor       vir_part,
1403                         tensor       vir,            /* tensors for virial and ekin, needed for computing */
1404                         t_commrec    *cr,
1405                         t_nrnb       *nrnb,
1406                         gmx_wallcycle_t wcycle,
1407                         gmx_update_t upd,
1408                         gmx_constr_t constr,
1409                         gmx_bool         bInitStep,
1410                         gmx_bool         bFirstHalf,
1411                         gmx_bool         bCalcVir,
1412                         real         vetanew)
1413 {
1414     gmx_bool             bExtended,bLastStep,bLog=FALSE,bEner=FALSE,bDoConstr=FALSE;
1415     double           dt;
1416     real             dt_1;
1417     int              start,homenr,nrend,i,n,m,g,d;
1418     tensor           vir_con;
1419     rvec             *vbuf,*xprime=NULL;
1420
1421     if (constr) {bDoConstr=TRUE;}
1422     if (bFirstHalf && !EI_VV(inputrec->eI)) {bDoConstr=FALSE;}
1423
1424     /* for now, SD update is here -- though it really seems like it
1425        should be reformulated as a velocity verlet method, since it has two parts */
1426
1427     start  = md->start;
1428     homenr = md->homenr;
1429     nrend = start+homenr;
1430
1431     dt   = inputrec->delta_t;
1432     dt_1 = 1.0/dt;
1433
1434     /*
1435      *  Steps (7C, 8C)
1436      *  APPLY CONSTRAINTS:
1437      *  BLOCK SHAKE
1438
1439      * When doing PR pressure coupling we have to constrain the
1440      * bonds in each iteration. If we are only using Nose-Hoover tcoupling
1441      * it is enough to do this once though, since the relative velocities
1442      * after this will be normal to the bond vector
1443      */
1444
1445     if (bDoConstr)
1446     {
1447         /* clear out constraints before applying */
1448         clear_mat(vir_part);
1449
1450         xprime = get_xprime(state,upd);
1451
1452         bLastStep = (step == inputrec->init_step+inputrec->nsteps);
1453         bLog  = (do_per_step(step,inputrec->nstlog) || bLastStep || (step < 0));
1454         bEner = (do_per_step(step,inputrec->nstenergy) || bLastStep);
1455         /* Constrain the coordinates xprime */
1456         wallcycle_start(wcycle,ewcCONSTR);
1457         if (EI_VV(inputrec->eI) && bFirstHalf)
1458         {
1459             constrain(NULL,bLog,bEner,constr,idef,
1460                       inputrec,ekind,cr,step,1,md,
1461                       state->x,state->v,state->v,
1462                       bMolPBC,state->box,
1463                       state->lambda[efptBONDED],dvdlambda,
1464                       NULL,bCalcVir ? &vir_con : NULL,nrnb,econqVeloc,
1465                       inputrec->epc==epcMTTK,state->veta,vetanew);
1466         }
1467         else
1468         {
1469             constrain(NULL,bLog,bEner,constr,idef,
1470                       inputrec,ekind,cr,step,1,md,
1471                       state->x,xprime,NULL,
1472                       bMolPBC,state->box,
1473                       state->lambda[efptBONDED],dvdlambda,
1474                       state->v,bCalcVir ? &vir_con : NULL ,nrnb,econqCoord,
1475                       inputrec->epc==epcMTTK,state->veta,state->veta);
1476         }
1477         wallcycle_stop(wcycle,ewcCONSTR);
1478
1479         where();
1480
1481         dump_it_all(fplog,"After Shake",
1482                     state->natoms,state->x,xprime,state->v,force);
1483
1484         if (bCalcVir)
1485         {
1486             if (inputrec->eI == eiSD2)
1487             {
1488                 /* A correction factor eph is needed for the SD constraint force */
1489                 /* Here we can, unfortunately, not have proper corrections
1490                  * for different friction constants, so we use the first one.
1491                  */
1492                 for(i=0; i<DIM; i++)
1493                 {
1494                     for(m=0; m<DIM; m++)
1495                     {
1496                         vir_part[i][m] += upd->sd->sdc[0].eph*vir_con[i][m];
1497                     }
1498                 }
1499             }
1500             else
1501             {
1502                 m_add(vir_part,vir_con,vir_part);
1503             }
1504             if (debug)
1505             {
1506                 pr_rvecs(debug,0,"constraint virial",vir_part,DIM);
1507             }
1508         }
1509     }
1510
1511     where();
1512     if ((inputrec->eI == eiSD2) && !(bFirstHalf))
1513     {
1514         xprime = get_xprime(state,upd);
1515
1516         /* The second part of the SD integration */
1517         do_update_sd2(upd->sd,FALSE,start,homenr,
1518                       inputrec->opts.acc,inputrec->opts.nFreeze,
1519                       md->invmass,md->ptype,
1520                       md->cFREEZE,md->cACC,md->cTC,
1521                       state->x,xprime,state->v,force,state->sd_X,
1522                       inputrec->opts.ngtc,inputrec->opts.tau_t,
1523                       inputrec->opts.ref_t,FALSE);
1524         inc_nrnb(nrnb, eNR_UPDATE, homenr);
1525
1526         if (bDoConstr)
1527         {
1528             /* Constrain the coordinates xprime */
1529             wallcycle_start(wcycle,ewcCONSTR);
1530             constrain(NULL,bLog,bEner,constr,idef,
1531                       inputrec,NULL,cr,step,1,md,
1532                       state->x,xprime,NULL,
1533                       bMolPBC,state->box,
1534                       state->lambda[efptBONDED],dvdlambda,
1535                       NULL,NULL,nrnb,econqCoord,FALSE,0,0);
1536             wallcycle_stop(wcycle,ewcCONSTR);
1537         }
1538     }
1539
1540     /* We must always unshift after updating coordinates; if we did not shake
1541        x was shifted in do_force */
1542
1543     if (!(bFirstHalf)) /* in the first half of vv, no shift. */
1544     {
1545         if (graph && (graph->nnodes > 0))
1546         {
1547             unshift_x(graph,state->box,state->x,upd->xp);
1548             if (TRICLINIC(state->box))
1549             {
1550                 inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes);
1551             }
1552             else
1553             {
1554                 inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes);
1555             }
1556         }
1557         else
1558         {
1559 #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntUpdate)) schedule(static)
1560             for(i=start; i<nrend; i++)
1561             {
1562                 copy_rvec(upd->xp[i],state->x[i]);
1563             }
1564         }
1565
1566         dump_it_all(fplog,"After unshift",
1567                     state->natoms,state->x,upd->xp,state->v,force);
1568     }
1569 /* ############# END the update of velocities and positions ######### */
1570 }
1571
1572 void update_box(FILE         *fplog,
1573                 gmx_large_int_t   step,
1574                 t_inputrec   *inputrec,      /* input record and box stuff      */
1575                 t_mdatoms    *md,
1576                 t_state      *state,
1577                 t_graph      *graph,
1578                 rvec         force[],        /* forces on home particles */
1579                 matrix       *scale_tot,
1580                 matrix       pcoupl_mu,
1581                 t_nrnb       *nrnb,
1582                 gmx_wallcycle_t wcycle,
1583                 gmx_update_t upd,
1584                 gmx_bool         bInitStep,
1585                 gmx_bool         bFirstHalf)
1586 {
1587     gmx_bool             bExtended,bLastStep,bLog=FALSE,bEner=FALSE;
1588     double           dt;
1589     real             dt_1;
1590     int              start,homenr,nrend,i,n,m,g;
1591     tensor           vir_con;
1592
1593     start  = md->start;
1594     homenr = md->homenr;
1595     nrend = start+homenr;
1596
1597     bExtended =
1598         (inputrec->etc == etcNOSEHOOVER) ||
1599         (inputrec->epc == epcPARRINELLORAHMAN) ||
1600         (inputrec->epc == epcMTTK);
1601
1602     dt = inputrec->delta_t;
1603
1604     where();
1605
1606     /* now update boxes */
1607     switch (inputrec->epc) {
1608     case (epcNO):
1609         break;
1610     case (epcBERENDSEN):
1611         berendsen_pscale(inputrec,pcoupl_mu,state->box,state->box_rel,
1612                          start,homenr,state->x,md->cFREEZE,nrnb);
1613         break;
1614     case (epcPARRINELLORAHMAN):
1615         /* The box velocities were updated in do_pr_pcoupl in the update
1616          * iteration, but we dont change the box vectors until we get here
1617          * since we need to be able to shift/unshift above.
1618          */
1619         for(i=0;i<DIM;i++)
1620         {
1621             for(m=0;m<=i;m++)
1622             {
1623                 state->box[i][m] += dt*state->boxv[i][m];
1624             }
1625         }
1626         preserve_box_shape(inputrec,state->box_rel,state->box);
1627
1628         /* Scale the coordinates */
1629         for(n=start; (n<start+homenr); n++)
1630         {
1631             tmvmul_ur0(pcoupl_mu,state->x[n],state->x[n]);
1632         }
1633         break;
1634     case (epcMTTK):
1635         switch (inputrec->epct)
1636         {
1637         case (epctISOTROPIC):
1638             /* DIM * eta = ln V.  so DIM*eta_new = DIM*eta_old + DIM*dt*veta =>
1639                ln V_new = ln V_old + 3*dt*veta => V_new = V_old*exp(3*dt*veta) =>
1640                Side length scales as exp(veta*dt) */
1641
1642             msmul(state->box,exp(state->veta*dt),state->box);
1643
1644             /* Relate veta to boxv.  veta = d(eta)/dT = (1/DIM)*1/V dV/dT.
1645 o               If we assume isotropic scaling, and box length scaling
1646                factor L, then V = L^DIM (det(M)).  So dV/dt = DIM
1647                L^(DIM-1) dL/dt det(M), and veta = (1/L) dL/dt.  The
1648                determinant of B is L^DIM det(M), and the determinant
1649                of dB/dt is (dL/dT)^DIM det (M).  veta will be
1650                (det(dB/dT)/det(B))^(1/3).  Then since M =
1651                B_new*(vol_new)^(1/3), dB/dT_new = (veta_new)*B(new). */
1652
1653             msmul(state->box,state->veta,state->boxv);
1654             break;
1655         default:
1656             break;
1657         }
1658         break;
1659     default:
1660         break;
1661     }
1662
1663     if ((!(IR_NPT_TROTTER(inputrec) || IR_NPH_TROTTER(inputrec))) && scale_tot)
1664     {
1665         /* The transposes of the scaling matrices are stored,
1666          * therefore we need to reverse the order in the multiplication.
1667          */
1668         mmul_ur0(*scale_tot,pcoupl_mu,*scale_tot);
1669     }
1670
1671     if (DEFORM(*inputrec))
1672     {
1673         deform(upd,start,homenr,state->x,state->box,scale_tot,inputrec,step);
1674     }
1675     where();
1676     dump_it_all(fplog,"After update",
1677                 state->natoms,state->x,upd->xp,state->v,force);
1678 }
1679
1680 void update_coords(FILE         *fplog,
1681                    gmx_large_int_t   step,
1682                    t_inputrec   *inputrec,      /* input record and box stuff   */
1683                    t_mdatoms    *md,
1684                    t_state      *state,
1685                    gmx_bool     bMolPBC,
1686                    rvec         *f,        /* forces on home particles */
1687                    gmx_bool         bDoLR,
1688                    rvec         *f_lr,
1689                    t_fcdata     *fcd,
1690                    gmx_ekindata_t *ekind,
1691                    matrix       M,
1692                    gmx_wallcycle_t wcycle,
1693                    gmx_update_t upd,
1694                    gmx_bool         bInitStep,
1695                    int          UpdatePart,
1696                    t_commrec    *cr,  /* these shouldn't be here -- need to think about it */
1697                    t_nrnb       *nrnb,
1698                    gmx_constr_t constr,
1699                    t_idef       *idef)
1700 {
1701     gmx_bool         bNH,bPR,bLastStep,bLog=FALSE,bEner=FALSE;
1702     double           dt,alpha;
1703     real             *imass,*imassin;
1704     rvec             *force;
1705     real             dt_1;
1706     int              start,homenr,nrend,i,j,d,n,m,g;
1707     int              blen0,blen1,iatom,jatom,nshake,nsettle,nconstr,nexpand;
1708     int              *icom = NULL;
1709     tensor           vir_con;
1710     rvec             *vcom,*xcom,*vall,*xall,*xin,*vin,*forcein,*fall,*xpall,*xprimein,*xprime;
1711     int              nth,th;
1712
1713     /* Running the velocity half does nothing except for velocity verlet */
1714     if ((UpdatePart == etrtVELOCITY1 || UpdatePart == etrtVELOCITY2) &&
1715         !EI_VV(inputrec->eI))
1716     {
1717         gmx_incons("update_coords called for velocity without VV integrator");
1718     }
1719
1720     start  = md->start;
1721     homenr = md->homenr;
1722     nrend = start+homenr;
1723
1724     xprime = get_xprime(state,upd);
1725
1726     dt   = inputrec->delta_t;
1727     dt_1 = 1.0/dt;
1728
1729     /* We need to update the NMR restraint history when time averaging is used */
1730     if (state->flags & (1<<estDISRE_RM3TAV))
1731     {
1732         update_disres_history(fcd,&state->hist);
1733     }
1734     if (state->flags & (1<<estORIRE_DTAV))
1735     {
1736         update_orires_history(fcd,&state->hist);
1737     }
1738
1739
1740     bNH = inputrec->etc == etcNOSEHOOVER;
1741     bPR = ((inputrec->epc == epcPARRINELLORAHMAN) || (inputrec->epc == epcMTTK));
1742
1743     if (bDoLR && inputrec->nstlist > 1 && !EI_VV(inputrec->eI))  /* get this working with VV? */
1744     {
1745         /* Store the total force + nstlist-1 times the LR force
1746          * in forces_lr, so it can be used in a normal update algorithm
1747          * to produce twin time stepping.
1748          */
1749         /* is this correct in the new construction? MRS */
1750         combine_forces(inputrec->nstlist,constr,inputrec,md,idef,cr,
1751                        step,state,bMolPBC,
1752                        start,nrend,f,f_lr,nrnb);
1753         force = f_lr;
1754     }
1755     else
1756     {
1757         force = f;
1758     }
1759
1760     /* ############# START The update of velocities and positions ######### */
1761     where();
1762     dump_it_all(fplog,"Before update",
1763                 state->natoms,state->x,xprime,state->v,force);
1764
1765     if (EI_RANDOM(inputrec->eI))
1766     {
1767         /* We still need to take care of generating random seeds properly
1768          * when multi-threading.
1769          */
1770         nth = 1;
1771     }
1772     else
1773     {
1774         nth = gmx_omp_nthreads_get(emntUpdate);
1775     }
1776
1777 # pragma omp parallel for num_threads(nth) schedule(static)
1778     for(th=0; th<nth; th++)
1779     {
1780         int start_th,end_th;
1781
1782         start_th = start + ((nrend-start)* th   )/nth;
1783         end_th   = start + ((nrend-start)*(th+1))/nth;
1784
1785         switch (inputrec->eI) {
1786         case (eiMD):
1787             if (ekind->cosacc.cos_accel == 0)
1788             {
1789                 do_update_md(start_th,end_th,dt,
1790                              ekind->tcstat,state->nosehoover_vxi,
1791                              ekind->bNEMD,ekind->grpstat,inputrec->opts.acc,
1792                              inputrec->opts.nFreeze,
1793                              md->invmass,md->ptype,
1794                              md->cFREEZE,md->cACC,md->cTC,
1795                              state->x,xprime,state->v,force,M,
1796                              bNH,bPR);
1797             } 
1798             else 
1799             {
1800                 do_update_visc(start_th,end_th,dt,
1801                                ekind->tcstat,state->nosehoover_vxi,
1802                                md->invmass,md->ptype,
1803                                md->cTC,state->x,xprime,state->v,force,M,
1804                                state->box,
1805                                ekind->cosacc.cos_accel,
1806                                ekind->cosacc.vcos,
1807                                bNH,bPR);
1808             }
1809             break;
1810         case (eiSD1):
1811             do_update_sd1(upd->sd,start,homenr,dt,
1812                           inputrec->opts.acc,inputrec->opts.nFreeze,
1813                           md->invmass,md->ptype,
1814                           md->cFREEZE,md->cACC,md->cTC,
1815                           state->x,xprime,state->v,force,state->sd_X,
1816                           inputrec->opts.ngtc,inputrec->opts.tau_t,inputrec->opts.ref_t);
1817             break;
1818         case (eiSD2):
1819             /* The SD update is done in 2 parts, because an extra constraint step
1820              * is needed 
1821              */
1822             do_update_sd2(upd->sd,bInitStep,start,homenr,
1823                           inputrec->opts.acc,inputrec->opts.nFreeze,
1824                           md->invmass,md->ptype,
1825                           md->cFREEZE,md->cACC,md->cTC,
1826                           state->x,xprime,state->v,force,state->sd_X,
1827                           inputrec->opts.ngtc,inputrec->opts.tau_t,inputrec->opts.ref_t,
1828                           TRUE);
1829         break;
1830         case (eiBD):
1831             do_update_bd(start,nrend,dt,
1832                          inputrec->opts.nFreeze,md->invmass,md->ptype,
1833                          md->cFREEZE,md->cTC,
1834                          state->x,xprime,state->v,force,
1835                          inputrec->bd_fric,
1836                          inputrec->opts.ngtc,inputrec->opts.tau_t,inputrec->opts.ref_t,
1837                          upd->sd->bd_rf,upd->sd->gaussrand);
1838             break;
1839         case (eiVV):
1840         case (eiVVAK):
1841             alpha = 1.0 + DIM/((double)inputrec->opts.nrdf[0]); /* assuming barostat coupled to group 0. */
1842             switch (UpdatePart) {
1843             case etrtVELOCITY1:
1844             case etrtVELOCITY2:
1845                 do_update_vv_vel(start_th,end_th,dt,
1846                                  ekind->tcstat,ekind->grpstat,
1847                                  inputrec->opts.acc,inputrec->opts.nFreeze,
1848                                  md->invmass,md->ptype,
1849                                  md->cFREEZE,md->cACC,
1850                                  state->v,force,
1851                                  (bNH || bPR),state->veta,alpha);  
1852                 break;
1853             case etrtPOSITION:
1854                 do_update_vv_pos(start_th,end_th,dt,
1855                                  ekind->tcstat,ekind->grpstat,
1856                                  inputrec->opts.acc,inputrec->opts.nFreeze,
1857                                  md->invmass,md->ptype,md->cFREEZE,
1858                                  state->x,xprime,state->v,force,
1859                                  (bNH || bPR),state->veta,alpha);
1860                 break;
1861             }
1862             break;
1863         default:
1864             gmx_fatal(FARGS,"Don't know how to update coordinates");
1865             break;
1866         }
1867     }
1868
1869 }
1870
1871
1872 void correct_ekin(FILE *log,int start,int end,rvec v[],rvec vcm,real mass[],
1873                   real tmass,tensor ekin)
1874 {
1875   /*
1876    * This is a debugging routine. It should not be called for production code
1877    *
1878    * The kinetic energy should calculated according to:
1879    *   Ekin = 1/2 m (v-vcm)^2
1880    * However the correction is not always applied, since vcm may not be
1881    * known in time and we compute
1882    *   Ekin' = 1/2 m v^2 instead
1883    * This can be corrected afterwards by computing
1884    *   Ekin = Ekin' + 1/2 m ( -2 v vcm + vcm^2)
1885    * or in hsorthand:
1886    *   Ekin = Ekin' - m v vcm + 1/2 m vcm^2
1887    */
1888   int    i,j,k;
1889   real   m,tm;
1890   rvec   hvcm,mv;
1891   tensor dekin;
1892
1893   /* Local particles */
1894   clear_rvec(mv);
1895
1896   /* Processor dependent part. */
1897   tm = 0;
1898   for(i=start; (i<end); i++)
1899   {
1900     m      = mass[i];
1901     tm    += m;
1902     for(j=0; (j<DIM); j++)
1903     {
1904         mv[j] += m*v[i][j];
1905     }
1906   }
1907   /* Shortcut */
1908   svmul(1/tmass,vcm,vcm);
1909   svmul(0.5,vcm,hvcm);
1910   clear_mat(dekin);
1911   for(j=0; (j<DIM); j++)
1912   {
1913       for(k=0; (k<DIM); k++)
1914       {
1915           dekin[j][k] += vcm[k]*(tm*hvcm[j]-mv[j]);
1916       }
1917   }
1918   pr_rvecs(log,0,"dekin",dekin,DIM);
1919   pr_rvecs(log,0," ekin", ekin,DIM);
1920   fprintf(log,"dekin = %g, ekin = %g  vcm = (%8.4f %8.4f %8.4f)\n",
1921           trace(dekin),trace(ekin),vcm[XX],vcm[YY],vcm[ZZ]);
1922   fprintf(log,"mv = (%8.4f %8.4f %8.4f)\n",
1923           mv[XX],mv[YY],mv[ZZ]);
1924 }
1925
1926 extern gmx_bool update_randomize_velocities(t_inputrec *ir, gmx_large_int_t step, t_mdatoms *md, t_state *state, gmx_update_t upd, t_idef *idef, gmx_constr_t constr) {
1927
1928     int i;
1929     real rate = (ir->delta_t)/ir->opts.tau_t[0];
1930     /* proceed with andersen if 1) it's fixed probability per
1931        particle andersen or 2) it's massive andersen and it's tau_t/dt */
1932     if ((ir->etc==etcANDERSEN) || do_per_step(step,(int)(1.0/rate)))
1933     {
1934         srenew(upd->randatom,state->nalloc);
1935         srenew(upd->randatom_list,state->nalloc);
1936         if (upd->randatom_list_init == FALSE) {
1937             for (i=0;i<state->nalloc;i++) {
1938                 upd->randatom[i] = FALSE;
1939                 upd->randatom_list[i] = 0;
1940             }
1941             upd->randatom_list_init = TRUE;
1942         }
1943         andersen_tcoupl(ir,md,state,upd->sd->gaussrand,rate,
1944                         (ir->etc==etcANDERSEN)?idef:NULL,
1945                         constr?get_nblocks(constr):0,
1946                         constr?get_sblock(constr):NULL,
1947                         upd->randatom,upd->randatom_list,
1948                         upd->sd->randomize_group,upd->sd->boltzfac);
1949         return TRUE;
1950     }
1951     return FALSE;
1952 }