src/gromacs/mdlib/update.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40
  41 #include <stdio.h>
  42 #include <math.h>
  43
  44 #include "types/commrec.h"
  45 #include "sysstuff.h"
  46 #include "smalloc.h"
  47 #include "typedefs.h"
  48 #include "nrnb.h"
  49 #include "physics.h"
  50 #include "macros.h"
  51 #include "vec.h"
  52 #include "main.h"
  53 #include "confio.h"
  54 #include "update.h"
  55 #include "gmx_random.h"
  56 #include "futil.h"
  57 #include "mshift.h"
  58 #include "tgroup.h"
  59 #include "force.h"
  60 #include "names.h"
  61 #include "txtdump.h"
  62 #include "mdrun.h"
  63 #include "constr.h"
  64 #include "edsam.h"
  65 #include "pull.h"
  66 #include "disre.h"
  67 #include "orires.h"
  68 #include "gmx_wallcycle.h"
  69 #include "gmx_omp_nthreads.h"
  70 #include "gmx_omp.h"
  71
  72 /*For debugging, start at v(-dt/2) for velolcity verlet -- uncomment next line */
  73 /*#define STARTFROMDT2*/
  74
  75 typedef struct {
  76     double gdt;
  77     double eph;
  78     double emh;
  79     double em;
  80     double b;
  81     double c;
  82     double d;
  83 } gmx_sd_const_t;
  84
  85 typedef struct {
  86     real V;
  87     real X;
  88     real Yv;
  89     real Yx;
  90 } gmx_sd_sigma_t;
  91
  92 typedef struct {
  93     /* The random state for ngaussrand threads.
  94      * Normal thermostats need just 1 random number generator,
  95      * but SD and BD with OpenMP parallelization need 1 for each thread.
  96      */
  97     int             ngaussrand;
  98     gmx_rng_t      *gaussrand;
  99     /* BD stuff */
 100     real           *bd_rf;
 101     /* SD stuff */
 102     gmx_sd_const_t *sdc;
 103     gmx_sd_sigma_t *sdsig;
 104     rvec           *sd_V;
 105     int             sd_V_nalloc;
 106     /* andersen temperature control stuff */
 107     gmx_bool       *randomize_group;
 108     real           *boltzfac;
 109 } gmx_stochd_t;
 110
 111 typedef struct gmx_update
 112 {
 113     gmx_stochd_t *sd;
 114     /* xprime for constraint algorithms */
 115     rvec         *xp;
 116     int           xp_nalloc;
 117
 118     /* variable size arrays for andersen */
 119     gmx_bool *randatom;
 120     int      *randatom_list;
 121     gmx_bool  randatom_list_init;
 122
 123     /* Variables for the deform algorithm */
 124     gmx_large_int_t deformref_step;
 125     matrix          deformref_box;
 126 } t_gmx_update;
 127
 128
 129 static void do_update_md(int start, int nrend, double dt,
 130                          t_grp_tcstat *tcstat,
 131                          double nh_vxi[],
 132                          gmx_bool bNEMD, t_grp_acc *gstat, rvec accel[],
 133                          ivec nFreeze[],
 134                          real invmass[],
 135                          unsigned short ptype[], unsigned short cFREEZE[],
 136                          unsigned short cACC[], unsigned short cTC[],
 137                          rvec x[], rvec xprime[], rvec v[],
 138                          rvec f[], matrix M,
 139                          gmx_bool bNH, gmx_bool bPR)
 140 {
 141     double imass, w_dt;
 142     int    gf = 0, ga = 0, gt = 0;
 143     rvec   vrel;
 144     real   vn, vv, va, vb, vnrel;
 145     real   lg, vxi = 0, u;
 146     int    n, d;
 147
 148     if (bNH || bPR)
 149     {
 150         /* Update with coupling to extended ensembles, used for
 151          * Nose-Hoover and Parrinello-Rahman coupling
 152          * Nose-Hoover uses the reversible leap-frog integrator from
 153          * Holian et al. Phys Rev E 52(3) : 2338, 1995
 154          */
 155         for (n = start; n < nrend; n++)
 156         {
 157             imass = invmass[n];
 158             if (cFREEZE)
 159             {
 160                 gf   = cFREEZE[n];
 161             }
 162             if (cACC)
 163             {
 164                 ga   = cACC[n];
 165             }
 166             if (cTC)
 167             {
 168                 gt   = cTC[n];
 169             }
 170             lg   = tcstat[gt].lambda;
 171             if (bNH)
 172             {
 173                 vxi   = nh_vxi[gt];
 174             }
 175             rvec_sub(v[n], gstat[ga].u, vrel);
 176
 177             for (d = 0; d < DIM; d++)
 178             {
 179                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 180                 {
 181                     vnrel = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
 182                                               - iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
 183                     /* do not scale the mean velocities u */
 184                     vn             = gstat[ga].u[d] + accel[ga][d]*dt + vnrel;
 185                     v[n][d]        = vn;
 186                     xprime[n][d]   = x[n][d]+vn*dt;
 187                 }
 188                 else
 189                 {
 190                     v[n][d]        = 0.0;
 191                     xprime[n][d]   = x[n][d];
 192                 }
 193             }
 194         }
 195     }
 196     else if (cFREEZE != NULL ||
 197              nFreeze[0][XX] || nFreeze[0][YY] || nFreeze[0][ZZ] ||
 198              bNEMD)
 199     {
 200         /* Update with Berendsen/v-rescale coupling and freeze or NEMD */
 201         for (n = start; n < nrend; n++)
 202         {
 203             w_dt = invmass[n]*dt;
 204             if (cFREEZE)
 205             {
 206                 gf   = cFREEZE[n];
 207             }
 208             if (cACC)
 209             {
 210                 ga   = cACC[n];
 211             }
 212             if (cTC)
 213             {
 214                 gt   = cTC[n];
 215             }
 216             lg   = tcstat[gt].lambda;
 217
 218             for (d = 0; d < DIM; d++)
 219             {
 220                 vn             = v[n][d];
 221                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 222                 {
 223                     vv             = lg*vn + f[n][d]*w_dt;
 224
 225                     /* do not scale the mean velocities u */
 226                     u              = gstat[ga].u[d];
 227                     va             = vv + accel[ga][d]*dt;
 228                     vb             = va + (1.0-lg)*u;
 229                     v[n][d]        = vb;
 230                     xprime[n][d]   = x[n][d]+vb*dt;
 231                 }
 232                 else
 233                 {
 234                     v[n][d]        = 0.0;
 235                     xprime[n][d]   = x[n][d];
 236                 }
 237             }
 238         }
 239     }
 240     else
 241     {
 242         /* Plain update with Berendsen/v-rescale coupling */
 243         for (n = start; n < nrend; n++)
 244         {
 245             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
 246             {
 247                 w_dt = invmass[n]*dt;
 248                 if (cTC)
 249                 {
 250                     gt = cTC[n];
 251                 }
 252                 lg = tcstat[gt].lambda;
 253
 254                 for (d = 0; d < DIM; d++)
 255                 {
 256                     vn           = lg*v[n][d] + f[n][d]*w_dt;
 257                     v[n][d]      = vn;
 258                     xprime[n][d] = x[n][d] + vn*dt;
 259                 }
 260             }
 261             else
 262             {
 263                 for (d = 0; d < DIM; d++)
 264                 {
 265                     v[n][d]        = 0.0;
 266                     xprime[n][d]   = x[n][d];
 267                 }
 268             }
 269         }
 270     }
 271 }
 272
 273 static void do_update_vv_vel(int start, int nrend, double dt,
 274                              t_grp_tcstat *tcstat, t_grp_acc *gstat,
 275                              rvec accel[], ivec nFreeze[], real invmass[],
 276                              unsigned short ptype[], unsigned short cFREEZE[],
 277                              unsigned short cACC[], rvec v[], rvec f[],
 278                              gmx_bool bExtended, real veta, real alpha)
 279 {
 280     double imass, w_dt;
 281     int    gf = 0, ga = 0;
 282     rvec   vrel;
 283     real   u, vn, vv, va, vb, vnrel;
 284     int    n, d;
 285     double g, mv1, mv2;
 286
 287     if (bExtended)
 288     {
 289         g        = 0.25*dt*veta*alpha;
 290         mv1      = exp(-g);
 291         mv2      = series_sinhx(g);
 292     }
 293     else
 294     {
 295         mv1      = 1.0;
 296         mv2      = 1.0;
 297     }
 298     for (n = start; n < nrend; n++)
 299     {
 300         w_dt = invmass[n]*dt;
 301         if (cFREEZE)
 302         {
 303             gf   = cFREEZE[n];
 304         }
 305         if (cACC)
 306         {
 307             ga   = cACC[n];
 308         }
 309
 310         for (d = 0; d < DIM; d++)
 311         {
 312             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 313             {
 314                 v[n][d]             = mv1*(mv1*v[n][d] + 0.5*(w_dt*mv2*f[n][d]))+0.5*accel[ga][d]*dt;
 315             }
 316             else
 317             {
 318                 v[n][d]        = 0.0;
 319             }
 320         }
 321     }
 322 } /* do_update_vv_vel */
 323
 324 static void do_update_vv_pos(int start, int nrend, double dt,
 325                              t_grp_tcstat *tcstat, t_grp_acc *gstat,
 326                              rvec accel[], ivec nFreeze[], real invmass[],
 327                              unsigned short ptype[], unsigned short cFREEZE[],
 328                              rvec x[], rvec xprime[], rvec v[],
 329                              rvec f[], gmx_bool bExtended, real veta, real alpha)
 330 {
 331     double imass, w_dt;
 332     int    gf = 0;
 333     int    n, d;
 334     double g, mr1, mr2;
 335
 336     /* Would it make more sense if Parrinello-Rahman was put here? */
 337     if (bExtended)
 338     {
 339         g        = 0.5*dt*veta;
 340         mr1      = exp(g);
 341         mr2      = series_sinhx(g);
 342     }
 343     else
 344     {
 345         mr1      = 1.0;
 346         mr2      = 1.0;
 347     }
 348
 349     for (n = start; n < nrend; n++)
 350     {
 351
 352         if (cFREEZE)
 353         {
 354             gf   = cFREEZE[n];
 355         }
 356
 357         for (d = 0; d < DIM; d++)
 358         {
 359             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 360             {
 361                 xprime[n][d]   = mr1*(mr1*x[n][d]+mr2*dt*v[n][d]);
 362             }
 363             else
 364             {
 365                 xprime[n][d]   = x[n][d];
 366             }
 367         }
 368     }
 369 } /* do_update_vv_pos */
 370
 371 static void do_update_visc(int start, int nrend, double dt,
 372                            t_grp_tcstat *tcstat,
 373                            double nh_vxi[],
 374                            real invmass[],
 375                            unsigned short ptype[], unsigned short cTC[],
 376                            rvec x[], rvec xprime[], rvec v[],
 377                            rvec f[], matrix M, matrix box, real
 378                            cos_accel, real vcos,
 379                            gmx_bool bNH, gmx_bool bPR)
 380 {
 381     double imass, w_dt;
 382     int    gt = 0;
 383     real   vn, vc;
 384     real   lg, vxi = 0, vv;
 385     real   fac, cosz;
 386     rvec   vrel;
 387     int    n, d;
 388
 389     fac = 2*M_PI/(box[ZZ][ZZ]);
 390
 391     if (bNH || bPR)
 392     {
 393         /* Update with coupling to extended ensembles, used for
 394          * Nose-Hoover and Parrinello-Rahman coupling
 395          */
 396         for (n = start; n < nrend; n++)
 397         {
 398             imass = invmass[n];
 399             if (cTC)
 400             {
 401                 gt   = cTC[n];
 402             }
 403             lg   = tcstat[gt].lambda;
 404             cosz = cos(fac*x[n][ZZ]);
 405
 406             copy_rvec(v[n], vrel);
 407
 408             vc            = cosz*vcos;
 409             vrel[XX]     -= vc;
 410             if (bNH)
 411             {
 412                 vxi        = nh_vxi[gt];
 413             }
 414             for (d = 0; d < DIM; d++)
 415             {
 416                 vn             = v[n][d];
 417
 418                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
 419                 {
 420                     vn  = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
 421                                             - iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
 422                     if (d == XX)
 423                     {
 424                         vn += vc + dt*cosz*cos_accel;
 425                     }
 426                     v[n][d]        = vn;
 427                     xprime[n][d]   = x[n][d]+vn*dt;
 428                 }
 429                 else
 430                 {
 431                     xprime[n][d]   = x[n][d];
 432                 }
 433             }
 434         }
 435     }
 436     else
 437     {
 438         /* Classic version of update, used with berendsen coupling */
 439         for (n = start; n < nrend; n++)
 440         {
 441             w_dt = invmass[n]*dt;
 442             if (cTC)
 443             {
 444                 gt   = cTC[n];
 445             }
 446             lg   = tcstat[gt].lambda;
 447             cosz = cos(fac*x[n][ZZ]);
 448
 449             for (d = 0; d < DIM; d++)
 450             {
 451                 vn             = v[n][d];
 452
 453                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
 454                 {
 455                     if (d == XX)
 456                     {
 457                         vc           = cosz*vcos;
 458                         /* Do not scale the cosine velocity profile */
 459                         vv           = vc + lg*(vn - vc + f[n][d]*w_dt);
 460                         /* Add the cosine accelaration profile */
 461                         vv          += dt*cosz*cos_accel;
 462                     }
 463                     else
 464                     {
 465                         vv           = lg*(vn + f[n][d]*w_dt);
 466                     }
 467                     v[n][d]        = vv;
 468                     xprime[n][d]   = x[n][d]+vv*dt;
 469                 }
 470                 else
 471                 {
 472                     v[n][d]        = 0.0;
 473                     xprime[n][d]   = x[n][d];
 474                 }
 475             }
 476         }
 477     }
 478 }
 479
 480 /* Allocates and initializes sd->gaussrand[i] for i=1, i<sd->ngaussrand,
 481  * Using seeds generated from sd->gaussrand[0].
 482  */
 483 static void init_multiple_gaussrand(gmx_stochd_t *sd)
 484 {
 485     int           ngr, i;
 486     unsigned int *seed;
 487
 488     ngr = sd->ngaussrand;
 489     snew(seed, ngr);
 490
 491     for (i = 1; i < ngr; i++)
 492     {
 493         seed[i] = gmx_rng_uniform_uint32(sd->gaussrand[0]);
 494     }
 495
 496     if (ngr != gmx_omp_nthreads_get(emntUpdate))
 497     {
 498         gmx_incons("The number of Gaussian number generators should be equal to gmx_omp_nthreads_get(emntUpdate)");
 499     }
 500
 501 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
 502     {
 503         int th;
 504
 505         th = gmx_omp_get_thread_num();
 506         if (th > 0)
 507         {
 508             /* Initialize on each thread to get memory allocated thread-local */
 509             sd->gaussrand[th] = gmx_rng_init(seed[th]);
 510         }
 511     }
 512
 513     sfree(seed);
 514 }
 515
 516 static gmx_stochd_t *init_stochd(FILE *fplog, t_inputrec *ir, int nthreads)
 517 {
 518     gmx_stochd_t   *sd;
 519     gmx_sd_const_t *sdc;
 520     int             ngtc, n, th;
 521     real            y;
 522
 523     snew(sd, 1);
 524
 525     /* Initiate random number generator for langevin type dynamics,
 526      * for BD, SD or velocity rescaling temperature coupling.
 527      */
 528     if (ir->eI == eiBD || EI_SD(ir->eI))
 529     {
 530         sd->ngaussrand = nthreads;
 531     }
 532     else
 533     {
 534         sd->ngaussrand = 1;
 535     }
 536     snew(sd->gaussrand, sd->ngaussrand);
 537
 538     /* Initialize the first random generator */
 539     sd->gaussrand[0] = gmx_rng_init(ir->ld_seed);
 540
 541     if (sd->ngaussrand > 1)
 542     {
 543         /* Initialize the rest of the random number generators,
 544          * using the first one to generate seeds.
 545          */
 546         init_multiple_gaussrand(sd);
 547     }
 548
 549     ngtc = ir->opts.ngtc;
 550
 551     if (ir->eI == eiBD)
 552     {
 553         snew(sd->bd_rf, ngtc);
 554     }
 555     else if (EI_SD(ir->eI))
 556     {
 557         snew(sd->sdc, ngtc);
 558         snew(sd->sdsig, ngtc);
 559
 560         sdc = sd->sdc;
 561         for (n = 0; n < ngtc; n++)
 562         {
 563             if (ir->opts.tau_t[n] > 0)
 564             {
 565                 sdc[n].gdt = ir->delta_t/ir->opts.tau_t[n];
 566                 sdc[n].eph = exp(sdc[n].gdt/2);
 567                 sdc[n].emh = exp(-sdc[n].gdt/2);
 568                 sdc[n].em  = exp(-sdc[n].gdt);
 569             }
 570             else
 571             {
 572                 /* No friction and noise on this group */
 573                 sdc[n].gdt = 0;
 574                 sdc[n].eph = 1;
 575                 sdc[n].emh = 1;
 576                 sdc[n].em  = 1;
 577             }
 578             if (sdc[n].gdt >= 0.05)
 579             {
 580                 sdc[n].b = sdc[n].gdt*(sdc[n].eph*sdc[n].eph - 1)
 581                     - 4*(sdc[n].eph - 1)*(sdc[n].eph - 1);
 582                 sdc[n].c = sdc[n].gdt - 3 + 4*sdc[n].emh - sdc[n].em;
 583                 sdc[n].d = 2 - sdc[n].eph - sdc[n].emh;
 584             }
 585             else
 586             {
 587                 y = sdc[n].gdt/2;
 588                 /* Seventh order expansions for small y */
 589                 sdc[n].b = y*y*y*y*(1/3.0+y*(1/3.0+y*(17/90.0+y*7/9.0)));
 590                 sdc[n].c = y*y*y*(2/3.0+y*(-1/2.0+y*(7/30.0+y*(-1/12.0+y*31/1260.0))));
 591                 sdc[n].d = y*y*(-1+y*y*(-1/12.0-y*y/360.0));
 592             }
 593             if (debug)
 594             {
 595                 fprintf(debug, "SD const tc-grp %d: b %g  c %g  d %g\n",
 596                         n, sdc[n].b, sdc[n].c, sdc[n].d);
 597             }
 598         }
 599     }
 600     else if (ETC_ANDERSEN(ir->etc))
 601     {
 602         int        ngtc;
 603         t_grpopts *opts;
 604         real       reft;
 605
 606         opts = &ir->opts;
 607         ngtc = opts->ngtc;
 608
 609         snew(sd->randomize_group, ngtc);
 610         snew(sd->boltzfac, ngtc);
 611
 612         /* for now, assume that all groups, if randomized, are randomized at the same rate, i.e. tau_t is the same. */
 613         /* since constraint groups don't necessarily match up with temperature groups! This is checked in readir.c */
 614
 615         for (n = 0; n < ngtc; n++)
 616         {
 617             reft = max(0.0, opts->ref_t[n]);
 618             if ((opts->tau_t[n] > 0) && (reft > 0))  /* tau_t or ref_t = 0 means that no randomization is done */
 619             {
 620                 sd->randomize_group[n] = TRUE;
 621                 sd->boltzfac[n]        = BOLTZ*opts->ref_t[n];
 622             }
 623             else
 624             {
 625                 sd->randomize_group[n] = FALSE;
 626             }
 627         }
 628     }
 629     return sd;
 630 }
 631
 632 void get_stochd_state(gmx_update_t upd, t_state *state)
 633 {
 634     /* Note that we only get the state of the first random generator,
 635      * even if there are multiple. This avoids repetition.
 636      */
 637     gmx_rng_get_state(upd->sd->gaussrand[0], state->ld_rng, state->ld_rngi);
 638 }
 639
 640 void set_stochd_state(gmx_update_t upd, t_state *state)
 641 {
 642     gmx_stochd_t *sd;
 643     int           i;
 644
 645     sd = upd->sd;
 646
 647     gmx_rng_set_state(sd->gaussrand[0], state->ld_rng, state->ld_rngi[0]);
 648
 649     if (sd->ngaussrand > 1)
 650     {
 651         /* We only end up here with SD or BD with OpenMP.
 652          * Destroy and reinitialize the rest of the random number generators,
 653          * using seeds generated from the first one.
 654          * Although this doesn't recover the previous state,
 655          * it at least avoids repetition, which is most important.
 656          * Exaclty restoring states with all MPI+OpenMP setups is difficult
 657          * and as the integrator is random to start with, doesn't gain us much.
 658          */
 659         for (i = 1; i < sd->ngaussrand; i++)
 660         {
 661             gmx_rng_destroy(sd->gaussrand[i]);
 662         }
 663
 664         init_multiple_gaussrand(sd);
 665     }
 666 }
 667
 668 gmx_update_t init_update(FILE *fplog, t_inputrec *ir)
 669 {
 670     t_gmx_update *upd;
 671
 672     snew(upd, 1);
 673
 674     if (ir->eI == eiBD || EI_SD(ir->eI) || ir->etc == etcVRESCALE || ETC_ANDERSEN(ir->etc))
 675     {
 676         upd->sd = init_stochd(fplog, ir, gmx_omp_nthreads_get(emntUpdate));
 677     }
 678
 679     upd->xp                 = NULL;
 680     upd->xp_nalloc          = 0;
 681     upd->randatom           = NULL;
 682     upd->randatom_list      = NULL;
 683     upd->randatom_list_init = FALSE; /* we have not yet cleared the data structure at this point */
 684
 685     return upd;
 686 }
 687
 688 static void do_update_sd1(gmx_stochd_t *sd,
 689                           gmx_rng_t gaussrand,
 690                           int start, int nrend, double dt,
 691                           rvec accel[], ivec nFreeze[],
 692                           real invmass[], unsigned short ptype[],
 693                           unsigned short cFREEZE[], unsigned short cACC[],
 694                           unsigned short cTC[],
 695                           rvec x[], rvec xprime[], rvec v[], rvec f[],
 696                           rvec sd_X[],
 697                           int ngtc, real tau_t[], real ref_t[])
 698 {
 699     gmx_sd_const_t *sdc;
 700     gmx_sd_sigma_t *sig;
 701     real            kT;
 702     int             gf = 0, ga = 0, gt = 0;
 703     real            ism, sd_V;
 704     int             n, d;
 705
 706     sdc = sd->sdc;
 707     sig = sd->sdsig;
 708
 709     for (n = 0; n < ngtc; n++)
 710     {
 711         kT = BOLTZ*ref_t[n];
 712         /* The mass is encounted for later, since this differs per atom */
 713         sig[n].V  = sqrt(kT*(1 - sdc[n].em*sdc[n].em));
 714     }
 715
 716     for (n = start; n < nrend; n++)
 717     {
 718         ism = sqrt(invmass[n]);
 719         if (cFREEZE)
 720         {
 721             gf  = cFREEZE[n];
 722         }
 723         if (cACC)
 724         {
 725             ga  = cACC[n];
 726         }
 727         if (cTC)
 728         {
 729             gt  = cTC[n];
 730         }
 731
 732         for (d = 0; d < DIM; d++)
 733         {
 734             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 735             {
 736                 sd_V = ism*sig[gt].V*gmx_rng_gaussian_table(gaussrand);
 737
 738                 v[n][d] = v[n][d]*sdc[gt].em
 739                     + (invmass[n]*f[n][d] + accel[ga][d])*tau_t[gt]*(1 - sdc[gt].em)
 740                     + sd_V;
 741
 742                 xprime[n][d] = x[n][d] + v[n][d]*dt;
 743             }
 744             else
 745             {
 746                 v[n][d]      = 0.0;
 747                 xprime[n][d] = x[n][d];
 748             }
 749         }
 750     }
 751 }
 752
 753 static void check_sd2_work_data_allocation(gmx_stochd_t *sd, int nrend)
 754 {
 755     if (nrend > sd->sd_V_nalloc)
 756     {
 757         sd->sd_V_nalloc = over_alloc_dd(nrend);
 758         srenew(sd->sd_V, sd->sd_V_nalloc);
 759     }
 760 }
 761
 762 static void do_update_sd2_Tconsts(gmx_stochd_t *sd,
 763                                   int ngtc,
 764                                   const real tau_t[],
 765                                   const real ref_t[])
 766 {
 767     /* This is separated from the update below, because it is single threaded */
 768     gmx_sd_const_t *sdc;
 769     gmx_sd_sigma_t *sig;
 770     int             gt;
 771     real            kT;
 772
 773     sdc = sd->sdc;
 774     sig = sd->sdsig;
 775
 776     for (gt = 0; gt < ngtc; gt++)
 777     {
 778         kT = BOLTZ*ref_t[gt];
 779         /* The mass is encounted for later, since this differs per atom */
 780         sig[gt].V  = sqrt(kT*(1-sdc[gt].em));
 781         sig[gt].X  = sqrt(kT*sqr(tau_t[gt])*sdc[gt].c);
 782         sig[gt].Yv = sqrt(kT*sdc[gt].b/sdc[gt].c);
 783         sig[gt].Yx = sqrt(kT*sqr(tau_t[gt])*sdc[gt].b/(1-sdc[gt].em));
 784     }
 785 }
 786
 787 static void do_update_sd2(gmx_stochd_t *sd,
 788                           gmx_rng_t gaussrand,
 789                           gmx_bool bInitStep,
 790                           int start, int nrend,
 791                           rvec accel[], ivec nFreeze[],
 792                           real invmass[], unsigned short ptype[],
 793                           unsigned short cFREEZE[], unsigned short cACC[],
 794                           unsigned short cTC[],
 795                           rvec x[], rvec xprime[], rvec v[], rvec f[],
 796                           rvec sd_X[],
 797                           const real tau_t[],
 798                           gmx_bool bFirstHalf)
 799 {
 800     gmx_sd_const_t *sdc;
 801     gmx_sd_sigma_t *sig;
 802     /* The random part of the velocity update, generated in the first
 803      * half of the update, needs to be remembered for the second half.
 804      */
 805     rvec  *sd_V;
 806     real   kT;
 807     int    gf = 0, ga = 0, gt = 0;
 808     real   vn = 0, Vmh, Xmh;
 809     real   ism;
 810     int    n, d;
 811
 812     sdc  = sd->sdc;
 813     sig  = sd->sdsig;
 814     sd_V = sd->sd_V;
 815
 816     for (n = start; n < nrend; n++)
 817     {
 818         ism = sqrt(invmass[n]);
 819         if (cFREEZE)
 820         {
 821             gf  = cFREEZE[n];
 822         }
 823         if (cACC)
 824         {
 825             ga  = cACC[n];
 826         }
 827         if (cTC)
 828         {
 829             gt  = cTC[n];
 830         }
 831
 832         for (d = 0; d < DIM; d++)
 833         {
 834             if (bFirstHalf)
 835             {
 836                 vn             = v[n][d];
 837             }
 838             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 839             {
 840                 if (bFirstHalf)
 841                 {
 842                     if (bInitStep)
 843                     {
 844                         sd_X[n][d] = ism*sig[gt].X*gmx_rng_gaussian_table(gaussrand);
 845                     }
 846                     Vmh = sd_X[n][d]*sdc[gt].d/(tau_t[gt]*sdc[gt].c)
 847                         + ism*sig[gt].Yv*gmx_rng_gaussian_table(gaussrand);
 848                     sd_V[n][d] = ism*sig[gt].V*gmx_rng_gaussian_table(gaussrand);
 849
 850                     v[n][d] = vn*sdc[gt].em
 851                         + (invmass[n]*f[n][d] + accel[ga][d])*tau_t[gt]*(1 - sdc[gt].em)
 852                         + sd_V[n][d] - sdc[gt].em*Vmh;
 853
 854                     xprime[n][d] = x[n][d] + v[n][d]*tau_t[gt]*(sdc[gt].eph - sdc[gt].emh);
 855                 }
 856                 else
 857                 {
 858
 859                     /* Correct the velocities for the constraints.
 860                      * This operation introduces some inaccuracy,
 861                      * since the velocity is determined from differences in coordinates.
 862                      */
 863                     v[n][d] =
 864                         (xprime[n][d] - x[n][d])/(tau_t[gt]*(sdc[gt].eph - sdc[gt].emh));
 865
 866                     Xmh = sd_V[n][d]*tau_t[gt]*sdc[gt].d/(sdc[gt].em-1)
 867                         + ism*sig[gt].Yx*gmx_rng_gaussian_table(gaussrand);
 868                     sd_X[n][d] = ism*sig[gt].X*gmx_rng_gaussian_table(gaussrand);
 869
 870                     xprime[n][d] += sd_X[n][d] - Xmh;
 871
 872                 }
 873             }
 874             else
 875             {
 876                 if (bFirstHalf)
 877                 {
 878                     v[n][d]        = 0.0;
 879                     xprime[n][d]   = x[n][d];
 880                 }
 881             }
 882         }
 883     }
 884 }
 885
 886 static void do_update_bd_Tconsts(double dt, real friction_coefficient,
 887                                  int ngtc, const real ref_t[],
 888                                  real *rf)
 889 {
 890     /* This is separated from the update below, because it is single threaded */
 891     int gt;
 892
 893     if (friction_coefficient != 0)
 894     {
 895         for (gt = 0; gt < ngtc; gt++)
 896         {
 897             rf[gt] = sqrt(2.0*BOLTZ*ref_t[gt]/(friction_coefficient*dt));
 898         }
 899     }
 900     else
 901     {
 902         for (gt = 0; gt < ngtc; gt++)
 903         {
 904             rf[gt] = sqrt(2.0*BOLTZ*ref_t[gt]);
 905         }
 906     }
 907 }
 908
 909 static void do_update_bd(int start, int nrend, double dt,
 910                          ivec nFreeze[],
 911                          real invmass[], unsigned short ptype[],
 912                          unsigned short cFREEZE[], unsigned short cTC[],
 913                          rvec x[], rvec xprime[], rvec v[],
 914                          rvec f[], real friction_coefficient,
 915                          real *rf, gmx_rng_t gaussrand)
 916 {
 917     /* note -- these appear to be full step velocities . . .  */
 918     int    gf = 0, gt = 0;
 919     real   vn;
 920     real   invfr = 0;
 921     int    n, d;
 922
 923     if (friction_coefficient != 0)
 924     {
 925         invfr = 1.0/friction_coefficient;
 926     }
 927
 928     for (n = start; (n < nrend); n++)
 929     {
 930         if (cFREEZE)
 931         {
 932             gf = cFREEZE[n];
 933         }
 934         if (cTC)
 935         {
 936             gt = cTC[n];
 937         }
 938         for (d = 0; (d < DIM); d++)
 939         {
 940             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 941             {
 942                 if (friction_coefficient != 0)
 943                 {
 944                     vn = invfr*f[n][d] + rf[gt]*gmx_rng_gaussian_table(gaussrand);
 945                 }
 946                 else
 947                 {
 948                     /* NOTE: invmass = 2/(mass*friction_constant*dt) */
 949                     vn = 0.5*invmass[n]*f[n][d]*dt
 950                         + sqrt(0.5*invmass[n])*rf[gt]*gmx_rng_gaussian_table(gaussrand);
 951                 }
 952
 953                 v[n][d]      = vn;
 954                 xprime[n][d] = x[n][d]+vn*dt;
 955             }
 956             else
 957             {
 958                 v[n][d]      = 0.0;
 959                 xprime[n][d] = x[n][d];
 960             }
 961         }
 962     }
 963 }
 964
 965 static void dump_it_all(FILE *fp, const char *title,
 966                         int natoms, rvec x[], rvec xp[], rvec v[], rvec f[])
 967 {
 968 #ifdef DEBUG
 969     if (fp)
 970     {
 971         fprintf(fp, "%s\n", title);
 972         pr_rvecs(fp, 0, "x", x, natoms);
 973         pr_rvecs(fp, 0, "xp", xp, natoms);
 974         pr_rvecs(fp, 0, "v", v, natoms);
 975         pr_rvecs(fp, 0, "f", f, natoms);
 976     }
 977 #endif
 978 }
 979
 980 static void calc_ke_part_normal(rvec v[], t_grpopts *opts, t_mdatoms *md,
 981                                 gmx_ekindata_t *ekind, t_nrnb *nrnb, gmx_bool bEkinAveVel,
 982                                 gmx_bool bSaveEkinOld)
 983 {
 984     int           g;
 985     t_grp_tcstat *tcstat  = ekind->tcstat;
 986     t_grp_acc    *grpstat = ekind->grpstat;
 987     int           nthread, thread;
 988
 989     /* three main: VV with AveVel, vv with AveEkin, leap with AveEkin.  Leap with AveVel is also
 990        an option, but not supported now.  Additionally, if we are doing iterations.
 991        bEkinAveVel: If TRUE, we sum into ekin, if FALSE, into ekinh.
 992        bSavEkinOld: If TRUE (in the case of iteration = bIterate is TRUE), we don't copy over the ekinh_old.
 993        If FALSE, we overrwrite it.
 994      */
 995
 996     /* group velocities are calculated in update_ekindata and
 997      * accumulated in acumulate_groups.
 998      * Now the partial global and groups ekin.
 999      */
1000     for (g = 0; (g < opts->ngtc); g++)
1001     {
1002
1003         if (!bSaveEkinOld)
1004         {
1005             copy_mat(tcstat[g].ekinh, tcstat[g].ekinh_old);
1006         }
1007         if (bEkinAveVel)
1008         {
1009             clear_mat(tcstat[g].ekinf);
1010         }
1011         else
1012         {
1013             clear_mat(tcstat[g].ekinh);
1014         }
1015         if (bEkinAveVel)
1016         {
1017             tcstat[g].ekinscalef_nhc = 1.0; /* need to clear this -- logic is complicated! */
1018         }
1019     }
1020     ekind->dekindl_old = ekind->dekindl;
1021
1022     nthread = gmx_omp_nthreads_get(emntUpdate);
1023
1024 #pragma omp parallel for num_threads(nthread) schedule(static)
1025     for (thread = 0; thread < nthread; thread++)
1026     {
1027         int     start_t, end_t, n;
1028         int     ga, gt;
1029         rvec    v_corrt;
1030         real    hm;
1031         int     d, m;
1032         matrix *ekin_sum;
1033         real   *dekindl_sum;
1034
1035         start_t = md->start + ((thread+0)*md->homenr)/nthread;
1036         end_t   = md->start + ((thread+1)*md->homenr)/nthread;
1037
1038         ekin_sum    = ekind->ekin_work[thread];
1039         dekindl_sum = ekind->dekindl_work[thread];
1040
1041         for (gt = 0; gt < opts->ngtc; gt++)
1042         {
1043             clear_mat(ekin_sum[gt]);
1044         }
1045         *dekindl_sum = 0.0;
1046
1047         ga = 0;
1048         gt = 0;
1049         for (n = start_t; n < end_t; n++)
1050         {
1051             if (md->cACC)
1052             {
1053                 ga = md->cACC[n];
1054             }
1055             if (md->cTC)
1056             {
1057                 gt = md->cTC[n];
1058             }
1059             hm   = 0.5*md->massT[n];
1060
1061             for (d = 0; (d < DIM); d++)
1062             {
1063                 v_corrt[d]  = v[n][d]  - grpstat[ga].u[d];
1064             }
1065             for (d = 0; (d < DIM); d++)
1066             {
1067                 for (m = 0; (m < DIM); m++)
1068                 {
1069                     /* if we're computing a full step velocity, v_corrt[d] has v(t).  Otherwise, v(t+dt/2) */
1070                     ekin_sum[gt][m][d] += hm*v_corrt[m]*v_corrt[d];
1071                 }
1072             }
1073             if (md->nMassPerturbed && md->bPerturbed[n])
1074             {
1075                 *dekindl_sum +=
1076                     0.5*(md->massB[n] - md->massA[n])*iprod(v_corrt, v_corrt);
1077             }
1078         }
1079     }
1080
1081     ekind->dekindl = 0;
1082     for (thread = 0; thread < nthread; thread++)
1083     {
1084         for (g = 0; g < opts->ngtc; g++)
1085         {
1086             if (bEkinAveVel)
1087             {
1088                 m_add(tcstat[g].ekinf, ekind->ekin_work[thread][g],
1089                       tcstat[g].ekinf);
1090             }
1091             else
1092             {
1093                 m_add(tcstat[g].ekinh, ekind->ekin_work[thread][g],
1094                       tcstat[g].ekinh);
1095             }
1096         }
1097
1098         ekind->dekindl += *ekind->dekindl_work[thread];
1099     }
1100
1101     inc_nrnb(nrnb, eNR_EKIN, md->homenr);
1102 }
1103
1104 static void calc_ke_part_visc(matrix box, rvec x[], rvec v[],
1105                               t_grpopts *opts, t_mdatoms *md,
1106                               gmx_ekindata_t *ekind,
1107                               t_nrnb *nrnb, gmx_bool bEkinAveVel, gmx_bool bSaveEkinOld)
1108 {
1109     int           start = md->start, homenr = md->homenr;
1110     int           g, d, n, m, gt = 0;
1111     rvec          v_corrt;
1112     real          hm;
1113     t_grp_tcstat *tcstat = ekind->tcstat;
1114     t_cos_acc    *cosacc = &(ekind->cosacc);
1115     real          dekindl;
1116     real          fac, cosz;
1117     double        mvcos;
1118
1119     for (g = 0; g < opts->ngtc; g++)
1120     {
1121         copy_mat(ekind->tcstat[g].ekinh, ekind->tcstat[g].ekinh_old);
1122         clear_mat(ekind->tcstat[g].ekinh);
1123     }
1124     ekind->dekindl_old = ekind->dekindl;
1125
1126     fac     = 2*M_PI/box[ZZ][ZZ];
1127     mvcos   = 0;
1128     dekindl = 0;
1129     for (n = start; n < start+homenr; n++)
1130     {
1131         if (md->cTC)
1132         {
1133             gt = md->cTC[n];
1134         }
1135         hm   = 0.5*md->massT[n];
1136
1137         /* Note that the times of x and v differ by half a step */
1138         /* MRS -- would have to be changed for VV */
1139         cosz         = cos(fac*x[n][ZZ]);
1140         /* Calculate the amplitude of the new velocity profile */
1141         mvcos       += 2*cosz*md->massT[n]*v[n][XX];
1142
1143         copy_rvec(v[n], v_corrt);
1144         /* Subtract the profile for the kinetic energy */
1145         v_corrt[XX] -= cosz*cosacc->vcos;
1146         for (d = 0; (d < DIM); d++)
1147         {
1148             for (m = 0; (m < DIM); m++)
1149             {
1150                 /* if we're computing a full step velocity, v_corrt[d] has v(t).  Otherwise, v(t+dt/2) */
1151                 if (bEkinAveVel)
1152                 {
1153                     tcstat[gt].ekinf[m][d] += hm*v_corrt[m]*v_corrt[d];
1154                 }
1155                 else
1156                 {
1157                     tcstat[gt].ekinh[m][d] += hm*v_corrt[m]*v_corrt[d];
1158                 }
1159             }
1160         }
1161         if (md->nPerturbed && md->bPerturbed[n])
1162         {
1163             dekindl += 0.5*(md->massB[n] - md->massA[n])*iprod(v_corrt, v_corrt);
1164         }
1165     }
1166     ekind->dekindl = dekindl;
1167     cosacc->mvcos  = mvcos;
1168
1169     inc_nrnb(nrnb, eNR_EKIN, homenr);
1170 }
1171
1172 void calc_ke_part(t_state *state, t_grpopts *opts, t_mdatoms *md,
1173                   gmx_ekindata_t *ekind, t_nrnb *nrnb, gmx_bool bEkinAveVel, gmx_bool bSaveEkinOld)
1174 {
1175     if (ekind->cosacc.cos_accel == 0)
1176     {
1177         calc_ke_part_normal(state->v, opts, md, ekind, nrnb, bEkinAveVel, bSaveEkinOld);
1178     }
1179     else
1180     {
1181         calc_ke_part_visc(state->box, state->x, state->v, opts, md, ekind, nrnb, bEkinAveVel, bSaveEkinOld);
1182     }
1183 }
1184
1185 extern void init_ekinstate(ekinstate_t *ekinstate, const t_inputrec *ir)
1186 {
1187     ekinstate->ekin_n = ir->opts.ngtc;
1188     snew(ekinstate->ekinh, ekinstate->ekin_n);
1189     snew(ekinstate->ekinf, ekinstate->ekin_n);
1190     snew(ekinstate->ekinh_old, ekinstate->ekin_n);
1191     snew(ekinstate->ekinscalef_nhc, ekinstate->ekin_n);
1192     snew(ekinstate->ekinscaleh_nhc, ekinstate->ekin_n);
1193     snew(ekinstate->vscale_nhc, ekinstate->ekin_n);
1194     ekinstate->dekindl = 0;
1195     ekinstate->mvcos   = 0;
1196 }
1197
1198 void update_ekinstate(ekinstate_t *ekinstate, gmx_ekindata_t *ekind)
1199 {
1200     int i;
1201
1202     for (i = 0; i < ekinstate->ekin_n; i++)
1203     {
1204         copy_mat(ekind->tcstat[i].ekinh, ekinstate->ekinh[i]);
1205         copy_mat(ekind->tcstat[i].ekinf, ekinstate->ekinf[i]);
1206         copy_mat(ekind->tcstat[i].ekinh_old, ekinstate->ekinh_old[i]);
1207         ekinstate->ekinscalef_nhc[i] = ekind->tcstat[i].ekinscalef_nhc;
1208         ekinstate->ekinscaleh_nhc[i] = ekind->tcstat[i].ekinscaleh_nhc;
1209         ekinstate->vscale_nhc[i]     = ekind->tcstat[i].vscale_nhc;
1210     }
1211
1212     copy_mat(ekind->ekin, ekinstate->ekin_total);
1213     ekinstate->dekindl = ekind->dekindl;
1214     ekinstate->mvcos   = ekind->cosacc.mvcos;
1215
1216 }
1217
1218 void restore_ekinstate_from_state(t_commrec *cr,
1219                                   gmx_ekindata_t *ekind, ekinstate_t *ekinstate)
1220 {
1221     int i, n;
1222
1223     if (MASTER(cr))
1224     {
1225         for (i = 0; i < ekinstate->ekin_n; i++)
1226         {
1227             copy_mat(ekinstate->ekinh[i], ekind->tcstat[i].ekinh);
1228             copy_mat(ekinstate->ekinf[i], ekind->tcstat[i].ekinf);
1229             copy_mat(ekinstate->ekinh_old[i], ekind->tcstat[i].ekinh_old);
1230             ekind->tcstat[i].ekinscalef_nhc = ekinstate->ekinscalef_nhc[i];
1231             ekind->tcstat[i].ekinscaleh_nhc = ekinstate->ekinscaleh_nhc[i];
1232             ekind->tcstat[i].vscale_nhc     = ekinstate->vscale_nhc[i];
1233         }
1234
1235         copy_mat(ekinstate->ekin_total, ekind->ekin);
1236
1237         ekind->dekindl      = ekinstate->dekindl;
1238         ekind->cosacc.mvcos = ekinstate->mvcos;
1239         n                   = ekinstate->ekin_n;
1240     }
1241
1242     if (PAR(cr))
1243     {
1244         gmx_bcast(sizeof(n), &n, cr);
1245         for (i = 0; i < n; i++)
1246         {
1247             gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinh[0][0]),
1248                       ekind->tcstat[i].ekinh[0], cr);
1249             gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinf[0][0]),
1250                       ekind->tcstat[i].ekinf[0], cr);
1251             gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinh_old[0][0]),
1252                       ekind->tcstat[i].ekinh_old[0], cr);
1253
1254             gmx_bcast(sizeof(ekind->tcstat[i].ekinscalef_nhc),
1255                       &(ekind->tcstat[i].ekinscalef_nhc), cr);
1256             gmx_bcast(sizeof(ekind->tcstat[i].ekinscaleh_nhc),
1257                       &(ekind->tcstat[i].ekinscaleh_nhc), cr);
1258             gmx_bcast(sizeof(ekind->tcstat[i].vscale_nhc),
1259                       &(ekind->tcstat[i].vscale_nhc), cr);
1260         }
1261         gmx_bcast(DIM*DIM*sizeof(ekind->ekin[0][0]),
1262                   ekind->ekin[0], cr);
1263
1264         gmx_bcast(sizeof(ekind->dekindl), &ekind->dekindl, cr);
1265         gmx_bcast(sizeof(ekind->cosacc.mvcos), &ekind->cosacc.mvcos, cr);
1266     }
1267 }
1268
1269 void set_deform_reference_box(gmx_update_t upd, gmx_large_int_t step, matrix box)
1270 {
1271     upd->deformref_step = step;
1272     copy_mat(box, upd->deformref_box);
1273 }
1274
1275 static void deform(gmx_update_t upd,
1276                    int start, int homenr, rvec x[], matrix box, matrix *scale_tot,
1277                    const t_inputrec *ir, gmx_large_int_t step)
1278 {
1279     matrix bnew, invbox, mu;
1280     real   elapsed_time;
1281     int    i, j;
1282
1283     elapsed_time = (step + 1 - upd->deformref_step)*ir->delta_t;
1284     copy_mat(box, bnew);
1285     for (i = 0; i < DIM; i++)
1286     {
1287         for (j = 0; j < DIM; j++)
1288         {
1289             if (ir->deform[i][j] != 0)
1290             {
1291                 bnew[i][j] =
1292                     upd->deformref_box[i][j] + elapsed_time*ir->deform[i][j];
1293             }
1294         }
1295     }
1296     /* We correct the off-diagonal elements,
1297      * which can grow indefinitely during shearing,
1298      * so the shifts do not get messed up.
1299      */
1300     for (i = 1; i < DIM; i++)
1301     {
1302         for (j = i-1; j >= 0; j--)
1303         {
1304             while (bnew[i][j] - box[i][j] > 0.5*bnew[j][j])
1305             {
1306                 rvec_dec(bnew[i], bnew[j]);
1307             }
1308             while (bnew[i][j] - box[i][j] < -0.5*bnew[j][j])
1309             {
1310                 rvec_inc(bnew[i], bnew[j]);
1311             }
1312         }
1313     }
1314     m_inv_ur0(box, invbox);
1315     copy_mat(bnew, box);
1316     mmul_ur0(box, invbox, mu);
1317
1318     for (i = start; i < start+homenr; i++)
1319     {
1320         x[i][XX] = mu[XX][XX]*x[i][XX]+mu[YY][XX]*x[i][YY]+mu[ZZ][XX]*x[i][ZZ];
1321         x[i][YY] = mu[YY][YY]*x[i][YY]+mu[ZZ][YY]*x[i][ZZ];
1322         x[i][ZZ] = mu[ZZ][ZZ]*x[i][ZZ];
1323     }
1324     if (*scale_tot)
1325     {
1326         /* The transposes of the scaling matrices are stored,
1327          * so we need to do matrix multiplication in the inverse order.
1328          */
1329         mmul_ur0(*scale_tot, mu, *scale_tot);
1330     }
1331 }
1332
1333 static void combine_forces(int nstcalclr,
1334                            gmx_constr_t constr,
1335                            t_inputrec *ir, t_mdatoms *md, t_idef *idef,
1336                            t_commrec *cr,
1337                            gmx_large_int_t step,
1338                            t_state *state, gmx_bool bMolPBC,
1339                            int start, int nrend,
1340                            rvec f[], rvec f_lr[],
1341                            t_nrnb *nrnb)
1342 {
1343     int  i, d, nm1;
1344
1345     /* f contains the short-range forces + the long range forces
1346      * which are stored separately in f_lr.
1347      */
1348
1349     if (constr != NULL && !(ir->eConstrAlg == econtSHAKE && ir->epc == epcNO))
1350     {
1351         /* We need to constrain the LR forces separately,
1352          * because due to the different pre-factor for the SR and LR
1353          * forces in the update algorithm, we can not determine
1354          * the constraint force for the coordinate constraining.
1355          * Constrain only the additional LR part of the force.
1356          */
1357         /* MRS -- need to make sure this works with trotter integration -- the constraint calls may not be right.*/
1358         constrain(NULL, FALSE, FALSE, constr, idef, ir, NULL, cr, step, 0, md,
1359                   state->x, f_lr, f_lr, bMolPBC, state->box, state->lambda[efptBONDED], NULL,
1360                   NULL, NULL, nrnb, econqForce, ir->epc == epcMTTK, state->veta, state->veta);
1361     }
1362
1363     /* Add nstcalclr-1 times the LR force to the sum of both forces
1364      * and store the result in forces_lr.
1365      */
1366     nm1 = nstcalclr - 1;
1367     for (i = start; i < nrend; i++)
1368     {
1369         for (d = 0; d < DIM; d++)
1370         {
1371             f_lr[i][d] = f[i][d] + nm1*f_lr[i][d];
1372         }
1373     }
1374 }
1375
1376 void update_tcouple(FILE             *fplog,
1377                     gmx_large_int_t   step,
1378                     t_inputrec       *inputrec,
1379                     t_state          *state,
1380                     gmx_ekindata_t   *ekind,
1381                     gmx_wallcycle_t   wcycle,
1382                     gmx_update_t      upd,
1383                     t_extmass        *MassQ,
1384                     t_mdatoms        *md)
1385
1386 {
1387     gmx_bool   bTCouple = FALSE;
1388     real       dttc;
1389     int        i, start, end, homenr, offset;
1390
1391     /* if using vv with trotter decomposition methods, we do this elsewhere in the code */
1392     if (inputrec->etc != etcNO &&
1393         !(IR_NVT_TROTTER(inputrec) || IR_NPT_TROTTER(inputrec) || IR_NPH_TROTTER(inputrec)))
1394     {
1395         /* We should only couple after a step where energies were determined (for leapfrog versions)
1396            or the step energies are determined, for velocity verlet versions */
1397
1398         if (EI_VV(inputrec->eI))
1399         {
1400             offset = 0;
1401         }
1402         else
1403         {
1404             offset = 1;
1405         }
1406         bTCouple = (inputrec->nsttcouple == 1 ||
1407                     do_per_step(step+inputrec->nsttcouple-offset,
1408                                 inputrec->nsttcouple));
1409     }
1410
1411     if (bTCouple)
1412     {
1413         dttc = inputrec->nsttcouple*inputrec->delta_t;
1414
1415         switch (inputrec->etc)
1416         {
1417             case etcNO:
1418                 break;
1419             case etcBERENDSEN:
1420                 berendsen_tcoupl(inputrec, ekind, dttc);
1421                 break;
1422             case etcNOSEHOOVER:
1423                 nosehoover_tcoupl(&(inputrec->opts), ekind, dttc,
1424                                   state->nosehoover_xi, state->nosehoover_vxi, MassQ);
1425                 break;
1426             case etcVRESCALE:
1427                 vrescale_tcoupl(inputrec, ekind, dttc,
1428                                 state->therm_integral, upd->sd->gaussrand[0]);
1429                 break;
1430         }
1431         /* rescale in place here */
1432         if (EI_VV(inputrec->eI))
1433         {
1434             rescale_velocities(ekind, md, md->start, md->start+md->homenr, state->v);
1435         }
1436     }
1437     else
1438     {
1439         /* Set the T scaling lambda to 1 to have no scaling */
1440         for (i = 0; (i < inputrec->opts.ngtc); i++)
1441         {
1442             ekind->tcstat[i].lambda = 1.0;
1443         }
1444     }
1445 }
1446
1447 void update_pcouple(FILE             *fplog,
1448                     gmx_large_int_t   step,
1449                     t_inputrec       *inputrec,
1450                     t_state          *state,
1451                     matrix            pcoupl_mu,
1452                     matrix            M,
1453                     gmx_wallcycle_t   wcycle,
1454                     gmx_update_t      upd,
1455                     gmx_bool          bInitStep)
1456 {
1457     gmx_bool   bPCouple = FALSE;
1458     real       dtpc     = 0;
1459     int        i;
1460
1461     /* if using Trotter pressure, we do this in coupling.c, so we leave it false. */
1462     if (inputrec->epc != epcNO && (!(IR_NPT_TROTTER(inputrec) || IR_NPH_TROTTER(inputrec))))
1463     {
1464         /* We should only couple after a step where energies were determined */
1465         bPCouple = (inputrec->nstpcouple == 1 ||
1466                     do_per_step(step+inputrec->nstpcouple-1,
1467                                 inputrec->nstpcouple));
1468     }
1469
1470     clear_mat(pcoupl_mu);
1471     for (i = 0; i < DIM; i++)
1472     {
1473         pcoupl_mu[i][i] = 1.0;
1474     }
1475
1476     clear_mat(M);
1477
1478     if (bPCouple)
1479     {
1480         dtpc = inputrec->nstpcouple*inputrec->delta_t;
1481
1482         switch (inputrec->epc)
1483         {
1484             /* We can always pcoupl, even if we did not sum the energies
1485              * the previous step, since state->pres_prev is only updated
1486              * when the energies have been summed.
1487              */
1488             case (epcNO):
1489                 break;
1490             case (epcBERENDSEN):
1491                 if (!bInitStep)
1492                 {
1493                     berendsen_pcoupl(fplog, step, inputrec, dtpc, state->pres_prev, state->box,
1494                                      pcoupl_mu);
1495                 }
1496                 break;
1497             case (epcPARRINELLORAHMAN):
1498                 parrinellorahman_pcoupl(fplog, step, inputrec, dtpc, state->pres_prev,
1499                                         state->box, state->box_rel, state->boxv,
1500                                         M, pcoupl_mu, bInitStep);
1501                 break;
1502             default:
1503                 break;
1504         }
1505     }
1506 }
1507
1508 static rvec *get_xprime(const t_state *state, gmx_update_t upd)
1509 {
1510     if (state->nalloc > upd->xp_nalloc)
1511     {
1512         upd->xp_nalloc = state->nalloc;
1513         srenew(upd->xp, upd->xp_nalloc);
1514     }
1515
1516     return upd->xp;
1517 }
1518
1519 void update_constraints(FILE             *fplog,
1520                         gmx_large_int_t   step,
1521                         real             *dvdlambda, /* the contribution to be added to the bonded interactions */
1522                         t_inputrec       *inputrec,  /* input record and box stuff      */
1523                         gmx_ekindata_t   *ekind,
1524                         t_mdatoms        *md,
1525                         t_state          *state,
1526                         gmx_bool          bMolPBC,
1527                         t_graph          *graph,
1528                         rvec              force[],   /* forces on home particles */
1529                         t_idef           *idef,
1530                         tensor            vir_part,
1531                         tensor            vir,       /* tensors for virial and ekin, needed for computing */
1532                         t_commrec        *cr,
1533                         t_nrnb           *nrnb,
1534                         gmx_wallcycle_t   wcycle,
1535                         gmx_update_t      upd,
1536                         gmx_constr_t      constr,
1537                         gmx_bool          bInitStep,
1538                         gmx_bool          bFirstHalf,
1539                         gmx_bool          bCalcVir,
1540                         real              vetanew)
1541 {
1542     gmx_bool             bExtended, bLastStep, bLog = FALSE, bEner = FALSE, bDoConstr = FALSE;
1543     double               dt;
1544     real                 dt_1;
1545     int                  start, homenr, nrend, i, n, m, g, d;
1546     tensor               vir_con;
1547     rvec                *vbuf, *xprime = NULL;
1548     int                  nth, th;
1549
1550     if (constr)
1551     {
1552         bDoConstr = TRUE;
1553     }
1554     if (bFirstHalf && !EI_VV(inputrec->eI))
1555     {
1556         bDoConstr = FALSE;
1557     }
1558
1559     /* for now, SD update is here -- though it really seems like it
1560        should be reformulated as a velocity verlet method, since it has two parts */
1561
1562     start  = md->start;
1563     homenr = md->homenr;
1564     nrend  = start+homenr;
1565
1566     dt   = inputrec->delta_t;
1567     dt_1 = 1.0/dt;
1568
1569     /*
1570      *  Steps (7C, 8C)
1571      *  APPLY CONSTRAINTS:
1572      *  BLOCK SHAKE
1573
1574      * When doing PR pressure coupling we have to constrain the
1575      * bonds in each iteration. If we are only using Nose-Hoover tcoupling
1576      * it is enough to do this once though, since the relative velocities
1577      * after this will be normal to the bond vector
1578      */
1579
1580     if (bDoConstr)
1581     {
1582         /* clear out constraints before applying */
1583         clear_mat(vir_part);
1584
1585         xprime = get_xprime(state, upd);
1586
1587         bLastStep = (step == inputrec->init_step+inputrec->nsteps);
1588         bLog      = (do_per_step(step, inputrec->nstlog) || bLastStep || (step < 0));
1589         bEner     = (do_per_step(step, inputrec->nstenergy) || bLastStep);
1590         /* Constrain the coordinates xprime */
1591         wallcycle_start(wcycle, ewcCONSTR);
1592         if (EI_VV(inputrec->eI) && bFirstHalf)
1593         {
1594             constrain(NULL, bLog, bEner, constr, idef,
1595                       inputrec, ekind, cr, step, 1, md,
1596                       state->x, state->v, state->v,
1597                       bMolPBC, state->box,
1598                       state->lambda[efptBONDED], dvdlambda,
1599                       NULL, bCalcVir ? &vir_con : NULL, nrnb, econqVeloc,
1600                       inputrec->epc == epcMTTK, state->veta, vetanew);
1601         }
1602         else
1603         {
1604             constrain(NULL, bLog, bEner, constr, idef,
1605                       inputrec, ekind, cr, step, 1, md,
1606                       state->x, xprime, NULL,
1607                       bMolPBC, state->box,
1608                       state->lambda[efptBONDED], dvdlambda,
1609                       state->v, bCalcVir ? &vir_con : NULL, nrnb, econqCoord,
1610                       inputrec->epc == epcMTTK, state->veta, state->veta);
1611         }
1612         wallcycle_stop(wcycle, ewcCONSTR);
1613
1614         where();
1615
1616         dump_it_all(fplog, "After Shake",
1617                     state->natoms, state->x, xprime, state->v, force);
1618
1619         if (bCalcVir)
1620         {
1621             if (inputrec->eI == eiSD2)
1622             {
1623                 /* A correction factor eph is needed for the SD constraint force */
1624                 /* Here we can, unfortunately, not have proper corrections
1625                  * for different friction constants, so we use the first one.
1626                  */
1627                 for (i = 0; i < DIM; i++)
1628                 {
1629                     for (m = 0; m < DIM; m++)
1630                     {
1631                         vir_part[i][m] += upd->sd->sdc[0].eph*vir_con[i][m];
1632                     }
1633                 }
1634             }
1635             else
1636             {
1637                 m_add(vir_part, vir_con, vir_part);
1638             }
1639             if (debug)
1640             {
1641                 pr_rvecs(debug, 0, "constraint virial", vir_part, DIM);
1642             }
1643         }
1644     }
1645
1646     where();
1647     if ((inputrec->eI == eiSD2) && !(bFirstHalf))
1648     {
1649         xprime = get_xprime(state, upd);
1650
1651         nth = gmx_omp_nthreads_get(emntUpdate);
1652
1653 #pragma omp parallel for num_threads(nth) schedule(static)
1654         for (th = 0; th < nth; th++)
1655         {
1656             int start_th, end_th;
1657
1658             start_th = start + ((nrend-start)* th   )/nth;
1659             end_th   = start + ((nrend-start)*(th+1))/nth;
1660
1661             /* The second part of the SD integration */
1662             do_update_sd2(upd->sd, upd->sd->gaussrand[th],
1663                           FALSE, start_th, end_th,
1664                           inputrec->opts.acc, inputrec->opts.nFreeze,
1665                           md->invmass, md->ptype,
1666                           md->cFREEZE, md->cACC, md->cTC,
1667                           state->x, xprime, state->v, force, state->sd_X,
1668                           inputrec->opts.tau_t,
1669                           FALSE);
1670         }
1671         inc_nrnb(nrnb, eNR_UPDATE, homenr);
1672
1673         if (bDoConstr)
1674         {
1675             /* Constrain the coordinates xprime */
1676             wallcycle_start(wcycle, ewcCONSTR);
1677             constrain(NULL, bLog, bEner, constr, idef,
1678                       inputrec, NULL, cr, step, 1, md,
1679                       state->x, xprime, NULL,
1680                       bMolPBC, state->box,
1681                       state->lambda[efptBONDED], dvdlambda,
1682                       NULL, NULL, nrnb, econqCoord, FALSE, 0, 0);
1683             wallcycle_stop(wcycle, ewcCONSTR);
1684         }
1685     }
1686
1687     /* We must always unshift after updating coordinates; if we did not shake
1688        x was shifted in do_force */
1689
1690     if (!(bFirstHalf)) /* in the first half of vv, no shift. */
1691     {
1692         if (graph && (graph->nnodes > 0))
1693         {
1694             unshift_x(graph, state->box, state->x, upd->xp);
1695             if (TRICLINIC(state->box))
1696             {
1697                 inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
1698             }
1699             else
1700             {
1701                 inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
1702             }
1703         }
1704         else
1705         {
1706 #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntUpdate)) schedule(static)
1707             for (i = start; i < nrend; i++)
1708             {
1709                 copy_rvec(upd->xp[i], state->x[i]);
1710             }
1711         }
1712
1713         dump_it_all(fplog, "After unshift",
1714                     state->natoms, state->x, upd->xp, state->v, force);
1715     }
1716 /* ############# END the update of velocities and positions ######### */
1717 }
1718
1719 void update_box(FILE             *fplog,
1720                 gmx_large_int_t   step,
1721                 t_inputrec       *inputrec,  /* input record and box stuff      */
1722                 t_mdatoms        *md,
1723                 t_state          *state,
1724                 t_graph          *graph,
1725                 rvec              force[],   /* forces on home particles */
1726                 matrix           *scale_tot,
1727                 matrix            pcoupl_mu,
1728                 t_nrnb           *nrnb,
1729                 gmx_wallcycle_t   wcycle,
1730                 gmx_update_t      upd,
1731                 gmx_bool          bInitStep,
1732                 gmx_bool          bFirstHalf)
1733 {
1734     gmx_bool             bExtended, bLastStep, bLog = FALSE, bEner = FALSE;
1735     double               dt;
1736     real                 dt_1;
1737     int                  start, homenr, nrend, i, n, m, g;
1738     tensor               vir_con;
1739
1740     start  = md->start;
1741     homenr = md->homenr;
1742     nrend  = start+homenr;
1743
1744     bExtended =
1745         (inputrec->etc == etcNOSEHOOVER) ||
1746         (inputrec->epc == epcPARRINELLORAHMAN) ||
1747         (inputrec->epc == epcMTTK);
1748
1749     dt = inputrec->delta_t;
1750
1751     where();
1752
1753     /* now update boxes */
1754     switch (inputrec->epc)
1755     {
1756         case (epcNO):
1757             break;
1758         case (epcBERENDSEN):
1759             berendsen_pscale(inputrec, pcoupl_mu, state->box, state->box_rel,
1760                              start, homenr, state->x, md->cFREEZE, nrnb);
1761             break;
1762         case (epcPARRINELLORAHMAN):
1763             /* The box velocities were updated in do_pr_pcoupl in the update
1764              * iteration, but we dont change the box vectors until we get here
1765              * since we need to be able to shift/unshift above.
1766              */
1767             for (i = 0; i < DIM; i++)
1768             {
1769                 for (m = 0; m <= i; m++)
1770                 {
1771                     state->box[i][m] += dt*state->boxv[i][m];
1772                 }
1773             }
1774             preserve_box_shape(inputrec, state->box_rel, state->box);
1775
1776             /* Scale the coordinates */
1777             for (n = start; (n < start+homenr); n++)
1778             {
1779                 tmvmul_ur0(pcoupl_mu, state->x[n], state->x[n]);
1780             }
1781             break;
1782         case (epcMTTK):
1783             switch (inputrec->epct)
1784             {
1785                 case (epctISOTROPIC):
1786                     /* DIM * eta = ln V.  so DIM*eta_new = DIM*eta_old + DIM*dt*veta =>
1787                        ln V_new = ln V_old + 3*dt*veta => V_new = V_old*exp(3*dt*veta) =>
1788                        Side length scales as exp(veta*dt) */
1789
1790                     msmul(state->box, exp(state->veta*dt), state->box);
1791
1792                     /* Relate veta to boxv.  veta = d(eta)/dT = (1/DIM)*1/V dV/dT.
1793                        o               If we assume isotropic scaling, and box length scaling
1794                        factor L, then V = L^DIM (det(M)).  So dV/dt = DIM
1795                        L^(DIM-1) dL/dt det(M), and veta = (1/L) dL/dt.  The
1796                        determinant of B is L^DIM det(M), and the determinant
1797                        of dB/dt is (dL/dT)^DIM det (M).  veta will be
1798                        (det(dB/dT)/det(B))^(1/3).  Then since M =
1799                        B_new*(vol_new)^(1/3), dB/dT_new = (veta_new)*B(new). */
1800
1801                     msmul(state->box, state->veta, state->boxv);
1802                     break;
1803                 default:
1804                     break;
1805             }
1806             break;
1807         default:
1808             break;
1809     }
1810
1811     if ((!(IR_NPT_TROTTER(inputrec) || IR_NPH_TROTTER(inputrec))) && scale_tot)
1812     {
1813         /* The transposes of the scaling matrices are stored,
1814          * therefore we need to reverse the order in the multiplication.
1815          */
1816         mmul_ur0(*scale_tot, pcoupl_mu, *scale_tot);
1817     }
1818
1819     if (DEFORM(*inputrec))
1820     {
1821         deform(upd, start, homenr, state->x, state->box, scale_tot, inputrec, step);
1822     }
1823     where();
1824     dump_it_all(fplog, "After update",
1825                 state->natoms, state->x, upd->xp, state->v, force);
1826 }
1827
1828 void update_coords(FILE             *fplog,
1829                    gmx_large_int_t   step,
1830                    t_inputrec       *inputrec,  /* input record and box stuff   */
1831                    t_mdatoms        *md,
1832                    t_state          *state,
1833                    gmx_bool          bMolPBC,
1834                    rvec             *f,    /* forces on home particles */
1835                    gmx_bool          bDoLR,
1836                    rvec             *f_lr,
1837                    t_fcdata         *fcd,
1838                    gmx_ekindata_t   *ekind,
1839                    matrix            M,
1840                    gmx_wallcycle_t   wcycle,
1841                    gmx_update_t      upd,
1842                    gmx_bool          bInitStep,
1843                    int               UpdatePart,
1844                    t_commrec        *cr, /* these shouldn't be here -- need to think about it */
1845                    t_nrnb           *nrnb,
1846                    gmx_constr_t      constr,
1847                    t_idef           *idef)
1848 {
1849     gmx_bool          bNH, bPR, bLastStep, bLog = FALSE, bEner = FALSE;
1850     double            dt, alpha;
1851     real             *imass, *imassin;
1852     rvec             *force;
1853     real              dt_1;
1854     int               start, homenr, nrend, i, j, d, n, m, g;
1855     int               blen0, blen1, iatom, jatom, nshake, nsettle, nconstr, nexpand;
1856     int              *icom = NULL;
1857     tensor            vir_con;
1858     rvec             *vcom, *xcom, *vall, *xall, *xin, *vin, *forcein, *fall, *xpall, *xprimein, *xprime;
1859     int               nth, th;
1860
1861     /* Running the velocity half does nothing except for velocity verlet */
1862     if ((UpdatePart == etrtVELOCITY1 || UpdatePart == etrtVELOCITY2) &&
1863         !EI_VV(inputrec->eI))
1864     {
1865         gmx_incons("update_coords called for velocity without VV integrator");
1866     }
1867
1868     start  = md->start;
1869     homenr = md->homenr;
1870     nrend  = start+homenr;
1871
1872     xprime = get_xprime(state, upd);
1873
1874     dt   = inputrec->delta_t;
1875     dt_1 = 1.0/dt;
1876
1877     /* We need to update the NMR restraint history when time averaging is used */
1878     if (state->flags & (1<<estDISRE_RM3TAV))
1879     {
1880         update_disres_history(fcd, &state->hist);
1881     }
1882     if (state->flags & (1<<estORIRE_DTAV))
1883     {
1884         update_orires_history(fcd, &state->hist);
1885     }
1886
1887
1888     bNH = inputrec->etc == etcNOSEHOOVER;
1889     bPR = ((inputrec->epc == epcPARRINELLORAHMAN) || (inputrec->epc == epcMTTK));
1890
1891     if (bDoLR && inputrec->nstcalclr > 1 && !EI_VV(inputrec->eI))  /* get this working with VV? */
1892     {
1893         /* Store the total force + nstcalclr-1 times the LR force
1894          * in forces_lr, so it can be used in a normal update algorithm
1895          * to produce twin time stepping.
1896          */
1897         /* is this correct in the new construction? MRS */
1898         combine_forces(inputrec->nstcalclr, constr, inputrec, md, idef, cr,
1899                        step, state, bMolPBC,
1900                        start, nrend, f, f_lr, nrnb);
1901         force = f_lr;
1902     }
1903     else
1904     {
1905         force = f;
1906     }
1907
1908     /* ############# START The update of velocities and positions ######### */
1909     where();
1910     dump_it_all(fplog, "Before update",
1911                 state->natoms, state->x, xprime, state->v, force);
1912
1913     if (inputrec->eI == eiSD2)
1914     {
1915         check_sd2_work_data_allocation(upd->sd, nrend);
1916
1917         do_update_sd2_Tconsts(upd->sd,
1918                               inputrec->opts.ngtc,
1919                               inputrec->opts.tau_t,
1920                               inputrec->opts.ref_t);
1921     }
1922     if (inputrec->eI == eiBD)
1923     {
1924         do_update_bd_Tconsts(dt, inputrec->bd_fric,
1925                              inputrec->opts.ngtc, inputrec->opts.ref_t,
1926                              upd->sd->bd_rf);
1927     }
1928
1929     nth = gmx_omp_nthreads_get(emntUpdate);
1930
1931 #pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
1932     for (th = 0; th < nth; th++)
1933     {
1934         int start_th, end_th;
1935
1936         start_th = start + ((nrend-start)* th   )/nth;
1937         end_th   = start + ((nrend-start)*(th+1))/nth;
1938
1939         switch (inputrec->eI)
1940         {
1941             case (eiMD):
1942                 if (ekind->cosacc.cos_accel == 0)
1943                 {
1944                     do_update_md(start_th, end_th, dt,
1945                                  ekind->tcstat, state->nosehoover_vxi,
1946                                  ekind->bNEMD, ekind->grpstat, inputrec->opts.acc,
1947                                  inputrec->opts.nFreeze,
1948                                  md->invmass, md->ptype,
1949                                  md->cFREEZE, md->cACC, md->cTC,
1950                                  state->x, xprime, state->v, force, M,
1951                                  bNH, bPR);
1952                 }
1953                 else
1954                 {
1955                     do_update_visc(start_th, end_th, dt,
1956                                    ekind->tcstat, state->nosehoover_vxi,
1957                                    md->invmass, md->ptype,
1958                                    md->cTC, state->x, xprime, state->v, force, M,
1959                                    state->box,
1960                                    ekind->cosacc.cos_accel,
1961                                    ekind->cosacc.vcos,
1962                                    bNH, bPR);
1963                 }
1964                 break;
1965             case (eiSD1):
1966                 do_update_sd1(upd->sd, upd->sd->gaussrand[th],
1967                               start_th, end_th, dt,
1968                               inputrec->opts.acc, inputrec->opts.nFreeze,
1969                               md->invmass, md->ptype,
1970                               md->cFREEZE, md->cACC, md->cTC,
1971                               state->x, xprime, state->v, force, state->sd_X,
1972                               inputrec->opts.ngtc, inputrec->opts.tau_t, inputrec->opts.ref_t);
1973                 break;
1974             case (eiSD2):
1975                 /* The SD update is done in 2 parts, because an extra constraint step
1976                  * is needed
1977                  */
1978                 do_update_sd2(upd->sd, upd->sd->gaussrand[th],
1979                               bInitStep, start_th, end_th,
1980                               inputrec->opts.acc, inputrec->opts.nFreeze,
1981                               md->invmass, md->ptype,
1982                               md->cFREEZE, md->cACC, md->cTC,
1983                               state->x, xprime, state->v, force, state->sd_X,
1984                               inputrec->opts.tau_t,
1985                               TRUE);
1986                 break;
1987             case (eiBD):
1988                 do_update_bd(start_th, end_th, dt,
1989                              inputrec->opts.nFreeze, md->invmass, md->ptype,
1990                              md->cFREEZE, md->cTC,
1991                              state->x, xprime, state->v, force,
1992                              inputrec->bd_fric,
1993                              upd->sd->bd_rf, upd->sd->gaussrand[th]);
1994                 break;
1995             case (eiVV):
1996             case (eiVVAK):
1997                 alpha = 1.0 + DIM/((double)inputrec->opts.nrdf[0]); /* assuming barostat coupled to group 0. */
1998                 switch (UpdatePart)
1999                 {
2000                     case etrtVELOCITY1:
2001                     case etrtVELOCITY2:
2002                         do_update_vv_vel(start_th, end_th, dt,
2003                                          ekind->tcstat, ekind->grpstat,
2004                                          inputrec->opts.acc, inputrec->opts.nFreeze,
2005                                          md->invmass, md->ptype,
2006                                          md->cFREEZE, md->cACC,
2007                                          state->v, force,
2008                                          (bNH || bPR), state->veta, alpha);
2009                         break;
2010                     case etrtPOSITION:
2011                         do_update_vv_pos(start_th, end_th, dt,
2012                                          ekind->tcstat, ekind->grpstat,
2013                                          inputrec->opts.acc, inputrec->opts.nFreeze,
2014                                          md->invmass, md->ptype, md->cFREEZE,
2015                                          state->x, xprime, state->v, force,
2016                                          (bNH || bPR), state->veta, alpha);
2017                         break;
2018                 }
2019                 break;
2020             default:
2021                 gmx_fatal(FARGS, "Don't know how to update coordinates");
2022                 break;
2023         }
2024     }
2025
2026 }
2027
2028
2029 void correct_ekin(FILE *log, int start, int end, rvec v[], rvec vcm, real mass[],
2030                   real tmass, tensor ekin)
2031 {
2032     /*
2033      * This is a debugging routine. It should not be called for production code
2034      *
2035      * The kinetic energy should calculated according to:
2036      *   Ekin = 1/2 m (v-vcm)^2
2037      * However the correction is not always applied, since vcm may not be
2038      * known in time and we compute
2039      *   Ekin' = 1/2 m v^2 instead
2040      * This can be corrected afterwards by computing
2041      *   Ekin = Ekin' + 1/2 m ( -2 v vcm + vcm^2)
2042      * or in hsorthand:
2043      *   Ekin = Ekin' - m v vcm + 1/2 m vcm^2
2044      */
2045     int    i, j, k;
2046     real   m, tm;
2047     rvec   hvcm, mv;
2048     tensor dekin;
2049
2050     /* Local particles */
2051     clear_rvec(mv);
2052
2053     /* Processor dependent part. */
2054     tm = 0;
2055     for (i = start; (i < end); i++)
2056     {
2057         m      = mass[i];
2058         tm    += m;
2059         for (j = 0; (j < DIM); j++)
2060         {
2061             mv[j] += m*v[i][j];
2062         }
2063     }
2064     /* Shortcut */
2065     svmul(1/tmass, vcm, vcm);
2066     svmul(0.5, vcm, hvcm);
2067     clear_mat(dekin);
2068     for (j = 0; (j < DIM); j++)
2069     {
2070         for (k = 0; (k < DIM); k++)
2071         {
2072             dekin[j][k] += vcm[k]*(tm*hvcm[j]-mv[j]);
2073         }
2074     }
2075     pr_rvecs(log, 0, "dekin", dekin, DIM);
2076     pr_rvecs(log, 0, " ekin", ekin, DIM);
2077     fprintf(log, "dekin = %g, ekin = %g  vcm = (%8.4f %8.4f %8.4f)\n",
2078             trace(dekin), trace(ekin), vcm[XX], vcm[YY], vcm[ZZ]);
2079     fprintf(log, "mv = (%8.4f %8.4f %8.4f)\n",
2080             mv[XX], mv[YY], mv[ZZ]);
2081 }
2082
2083 extern gmx_bool update_randomize_velocities(t_inputrec *ir, gmx_large_int_t step, t_mdatoms *md, t_state *state, gmx_update_t upd, t_idef *idef, gmx_constr_t constr)
2084 {
2085
2086     int  i;
2087     real rate = (ir->delta_t)/ir->opts.tau_t[0];
2088     /* proceed with andersen if 1) it's fixed probability per
2089        particle andersen or 2) it's massive andersen and it's tau_t/dt */
2090     if ((ir->etc == etcANDERSEN) || do_per_step(step, (int)(1.0/rate)))
2091     {
2092         srenew(upd->randatom, state->nalloc);
2093         srenew(upd->randatom_list, state->nalloc);
2094         if (upd->randatom_list_init == FALSE)
2095         {
2096             for (i = 0; i < state->nalloc; i++)
2097             {
2098                 upd->randatom[i]      = FALSE;
2099                 upd->randatom_list[i] = 0;
2100             }
2101             upd->randatom_list_init = TRUE;
2102         }
2103         andersen_tcoupl(ir, md, state, upd->sd->gaussrand[0], rate,
2104                         (ir->etc == etcANDERSEN) ? idef : NULL,
2105                         constr ? get_nblocks(constr) : 0,
2106                         constr ? get_sblock(constr) : NULL,
2107                         upd->randatom, upd->randatom_list,
2108                         upd->sd->randomize_group, upd->sd->boltzfac);
2109         return TRUE;
2110     }
2111     return FALSE;
2112 }