src/gromacs/mdlib/update.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "update.h"
  40
  41 #include <math.h>
  42 #include <stdio.h>
  43
  44 #include <algorithm>
  45
  46 #include "gromacs/domdec/domdec_struct.h"
  47 #include "gromacs/fileio/confio.h"
  48 #include "gromacs/gmxlib/network.h"
  49 #include "gromacs/gmxlib/nrnb.h"
  50 #include "gromacs/listed-forces/disre.h"
  51 #include "gromacs/listed-forces/orires.h"
  52 #include "gromacs/math/functions.h"
  53 #include "gromacs/math/invertmatrix.h"
  54 #include "gromacs/math/units.h"
  55 #include "gromacs/math/vec.h"
  56 #include "gromacs/math/vecdump.h"
  57 #include "gromacs/mdlib/constr.h"
  58 #include "gromacs/mdlib/force.h"
  59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  60 #include "gromacs/mdlib/mdrun.h"
  61 #include "gromacs/mdlib/sim_util.h"
  62 #include "gromacs/mdlib/tgroup.h"
  63 #include "gromacs/mdtypes/commrec.h"
  64 #include "gromacs/mdtypes/group.h"
  65 #include "gromacs/mdtypes/inputrec.h"
  66 #include "gromacs/mdtypes/md_enums.h"
  67 #include "gromacs/pbcutil/boxutilities.h"
  68 #include "gromacs/pbcutil/mshift.h"
  69 #include "gromacs/pbcutil/pbc.h"
  70 #include "gromacs/pulling/pull.h"
  71 #include "gromacs/random/tabulatednormaldistribution.h"
  72 #include "gromacs/random/threefry.h"
  73 #include "gromacs/timing/wallcycle.h"
  74 #include "gromacs/utility/exceptions.h"
  75 #include "gromacs/utility/fatalerror.h"
  76 #include "gromacs/utility/futil.h"
  77 #include "gromacs/utility/gmxassert.h"
  78 #include "gromacs/utility/gmxomp.h"
  79 #include "gromacs/utility/smalloc.h"
  80
  81 /*For debugging, start at v(-dt/2) for velolcity verlet -- uncomment next line */
  82 /*#define STARTFROMDT2*/
  83
  84 typedef struct {
  85     double em;
  86 } gmx_sd_const_t;
  87
  88 typedef struct {
  89     real V;
  90 } gmx_sd_sigma_t;
  91
  92 typedef struct {
  93     /* BD stuff */
  94     real           *bd_rf;
  95     /* SD stuff */
  96     gmx_sd_const_t *sdc;
  97     gmx_sd_sigma_t *sdsig;
  98     /* andersen temperature control stuff */
  99     gmx_bool       *randomize_group;
 100     real           *boltzfac;
 101 } gmx_stochd_t;
 102
 103 struct gmx_update_t
 104 {
 105     gmx_stochd_t *sd;
 106     /* xprime for constraint algorithms */
 107     rvec         *xp;
 108     int           xp_nalloc;
 109
 110     /* Variables for the deform algorithm */
 111     gmx_int64_t     deformref_step;
 112     matrix          deformref_box;
 113 };
 114
 115
 116 static void do_update_md(int start, int nrend, double dt,
 117                          t_grp_tcstat *tcstat,
 118                          double nh_vxi[],
 119                          gmx_bool bNEMD, t_grp_acc *gstat, rvec accel[],
 120                          ivec nFreeze[],
 121                          real invmass[],
 122                          unsigned short ptype[], unsigned short cFREEZE[],
 123                          unsigned short cACC[], unsigned short cTC[],
 124                          rvec x[], rvec xprime[], rvec v[],
 125                          rvec f[], matrix M,
 126                          gmx_bool bNH, gmx_bool bPR)
 127 {
 128     double imass, w_dt;
 129     int    gf = 0, ga = 0, gt = 0;
 130     rvec   vrel;
 131     real   vn, vv, va, vb, vnrel;
 132     real   lg, vxi = 0, u;
 133     int    n, d;
 134
 135     if (bNH || bPR)
 136     {
 137         /* Update with coupling to extended ensembles, used for
 138          * Nose-Hoover and Parrinello-Rahman coupling
 139          * Nose-Hoover uses the reversible leap-frog integrator from
 140          * Holian et al. Phys Rev E 52(3) : 2338, 1995
 141          */
 142         for (n = start; n < nrend; n++)
 143         {
 144             imass = invmass[n];
 145             if (cFREEZE)
 146             {
 147                 gf   = cFREEZE[n];
 148             }
 149             if (cACC)
 150             {
 151                 ga   = cACC[n];
 152             }
 153             if (cTC)
 154             {
 155                 gt   = cTC[n];
 156             }
 157             lg   = tcstat[gt].lambda;
 158             if (bNH)
 159             {
 160                 vxi   = nh_vxi[gt];
 161             }
 162             rvec_sub(v[n], gstat[ga].u, vrel);
 163
 164             for (d = 0; d < DIM; d++)
 165             {
 166                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 167                 {
 168                     vnrel = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
 169                                               - iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
 170                     /* do not scale the mean velocities u */
 171                     vn             = gstat[ga].u[d] + accel[ga][d]*dt + vnrel;
 172                     v[n][d]        = vn;
 173                     xprime[n][d]   = x[n][d]+vn*dt;
 174                 }
 175                 else
 176                 {
 177                     v[n][d]        = 0.0;
 178                     xprime[n][d]   = x[n][d];
 179                 }
 180             }
 181         }
 182     }
 183     else if (cFREEZE != NULL ||
 184              nFreeze[0][XX] || nFreeze[0][YY] || nFreeze[0][ZZ] ||
 185              bNEMD)
 186     {
 187         /* Update with Berendsen/v-rescale coupling and freeze or NEMD */
 188         for (n = start; n < nrend; n++)
 189         {
 190             w_dt = invmass[n]*dt;
 191             if (cFREEZE)
 192             {
 193                 gf   = cFREEZE[n];
 194             }
 195             if (cACC)
 196             {
 197                 ga   = cACC[n];
 198             }
 199             if (cTC)
 200             {
 201                 gt   = cTC[n];
 202             }
 203             lg   = tcstat[gt].lambda;
 204
 205             for (d = 0; d < DIM; d++)
 206             {
 207                 vn             = v[n][d];
 208                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 209                 {
 210                     vv             = lg*vn + f[n][d]*w_dt;
 211
 212                     /* do not scale the mean velocities u */
 213                     u              = gstat[ga].u[d];
 214                     va             = vv + accel[ga][d]*dt;
 215                     vb             = va + (1.0-lg)*u;
 216                     v[n][d]        = vb;
 217                     xprime[n][d]   = x[n][d]+vb*dt;
 218                 }
 219                 else
 220                 {
 221                     v[n][d]        = 0.0;
 222                     xprime[n][d]   = x[n][d];
 223                 }
 224             }
 225         }
 226     }
 227     else
 228     {
 229         /* Plain update with Berendsen/v-rescale coupling */
 230         for (n = start; n < nrend; n++)
 231         {
 232             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
 233             {
 234                 w_dt = invmass[n]*dt;
 235                 if (cTC)
 236                 {
 237                     gt = cTC[n];
 238                 }
 239                 lg = tcstat[gt].lambda;
 240
 241                 for (d = 0; d < DIM; d++)
 242                 {
 243                     vn           = lg*v[n][d] + f[n][d]*w_dt;
 244                     v[n][d]      = vn;
 245                     xprime[n][d] = x[n][d] + vn*dt;
 246                 }
 247             }
 248             else
 249             {
 250                 for (d = 0; d < DIM; d++)
 251                 {
 252                     v[n][d]        = 0.0;
 253                     xprime[n][d]   = x[n][d];
 254                 }
 255             }
 256         }
 257     }
 258 }
 259
 260 static void do_update_vv_vel(int start, int nrend, double dt,
 261                              rvec accel[], ivec nFreeze[], real invmass[],
 262                              unsigned short ptype[], unsigned short cFREEZE[],
 263                              unsigned short cACC[], rvec v[], rvec f[],
 264                              gmx_bool bExtended, real veta, real alpha)
 265 {
 266     double w_dt;
 267     int    gf = 0, ga = 0;
 268     int    n, d;
 269     double g, mv1, mv2;
 270
 271     if (bExtended)
 272     {
 273         g        = 0.25*dt*veta*alpha;
 274         mv1      = exp(-g);
 275         mv2      = gmx::series_sinhx(g);
 276     }
 277     else
 278     {
 279         mv1      = 1.0;
 280         mv2      = 1.0;
 281     }
 282     for (n = start; n < nrend; n++)
 283     {
 284         w_dt = invmass[n]*dt;
 285         if (cFREEZE)
 286         {
 287             gf   = cFREEZE[n];
 288         }
 289         if (cACC)
 290         {
 291             ga   = cACC[n];
 292         }
 293
 294         for (d = 0; d < DIM; d++)
 295         {
 296             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 297             {
 298                 v[n][d]             = mv1*(mv1*v[n][d] + 0.5*(w_dt*mv2*f[n][d]))+0.5*accel[ga][d]*dt;
 299             }
 300             else
 301             {
 302                 v[n][d]        = 0.0;
 303             }
 304         }
 305     }
 306 } /* do_update_vv_vel */
 307
 308 static void do_update_vv_pos(int start, int nrend, double dt,
 309                              ivec nFreeze[],
 310                              unsigned short ptype[], unsigned short cFREEZE[],
 311                              rvec x[], rvec xprime[], rvec v[],
 312                              gmx_bool bExtended, real veta)
 313 {
 314     int    gf = 0;
 315     int    n, d;
 316     double g, mr1, mr2;
 317
 318     /* Would it make more sense if Parrinello-Rahman was put here? */
 319     if (bExtended)
 320     {
 321         g        = 0.5*dt*veta;
 322         mr1      = exp(g);
 323         mr2      = gmx::series_sinhx(g);
 324     }
 325     else
 326     {
 327         mr1      = 1.0;
 328         mr2      = 1.0;
 329     }
 330
 331     for (n = start; n < nrend; n++)
 332     {
 333
 334         if (cFREEZE)
 335         {
 336             gf   = cFREEZE[n];
 337         }
 338
 339         for (d = 0; d < DIM; d++)
 340         {
 341             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 342             {
 343                 xprime[n][d]   = mr1*(mr1*x[n][d]+mr2*dt*v[n][d]);
 344             }
 345             else
 346             {
 347                 xprime[n][d]   = x[n][d];
 348             }
 349         }
 350     }
 351 } /* do_update_vv_pos */
 352
 353 static void do_update_visc(int start, int nrend, double dt,
 354                            t_grp_tcstat *tcstat,
 355                            double nh_vxi[],
 356                            real invmass[],
 357                            unsigned short ptype[], unsigned short cTC[],
 358                            rvec x[], rvec xprime[], rvec v[],
 359                            rvec f[], matrix M, matrix box, real
 360                            cos_accel, real vcos,
 361                            gmx_bool bNH, gmx_bool bPR)
 362 {
 363     double imass, w_dt;
 364     int    gt = 0;
 365     real   vn, vc;
 366     real   lg, vxi = 0, vv;
 367     real   fac, cosz;
 368     rvec   vrel;
 369     int    n, d;
 370
 371     fac = 2*M_PI/(box[ZZ][ZZ]);
 372
 373     if (bNH || bPR)
 374     {
 375         /* Update with coupling to extended ensembles, used for
 376          * Nose-Hoover and Parrinello-Rahman coupling
 377          */
 378         for (n = start; n < nrend; n++)
 379         {
 380             imass = invmass[n];
 381             if (cTC)
 382             {
 383                 gt   = cTC[n];
 384             }
 385             lg   = tcstat[gt].lambda;
 386             cosz = cos(fac*x[n][ZZ]);
 387
 388             copy_rvec(v[n], vrel);
 389
 390             vc            = cosz*vcos;
 391             vrel[XX]     -= vc;
 392             if (bNH)
 393             {
 394                 vxi        = nh_vxi[gt];
 395             }
 396             for (d = 0; d < DIM; d++)
 397             {
 398                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
 399                 {
 400                     vn  = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
 401                                             - iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
 402                     if (d == XX)
 403                     {
 404                         vn += vc + dt*cosz*cos_accel;
 405                     }
 406                     v[n][d]        = vn;
 407                     xprime[n][d]   = x[n][d]+vn*dt;
 408                 }
 409                 else
 410                 {
 411                     xprime[n][d]   = x[n][d];
 412                 }
 413             }
 414         }
 415     }
 416     else
 417     {
 418         /* Classic version of update, used with berendsen coupling */
 419         for (n = start; n < nrend; n++)
 420         {
 421             w_dt = invmass[n]*dt;
 422             if (cTC)
 423             {
 424                 gt   = cTC[n];
 425             }
 426             lg   = tcstat[gt].lambda;
 427             cosz = cos(fac*x[n][ZZ]);
 428
 429             for (d = 0; d < DIM; d++)
 430             {
 431                 vn             = v[n][d];
 432
 433                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
 434                 {
 435                     if (d == XX)
 436                     {
 437                         vc           = cosz*vcos;
 438                         /* Do not scale the cosine velocity profile */
 439                         vv           = vc + lg*(vn - vc + f[n][d]*w_dt);
 440                         /* Add the cosine accelaration profile */
 441                         vv          += dt*cosz*cos_accel;
 442                     }
 443                     else
 444                     {
 445                         vv           = lg*(vn + f[n][d]*w_dt);
 446                     }
 447                     v[n][d]        = vv;
 448                     xprime[n][d]   = x[n][d]+vv*dt;
 449                 }
 450                 else
 451                 {
 452                     v[n][d]        = 0.0;
 453                     xprime[n][d]   = x[n][d];
 454                 }
 455             }
 456         }
 457     }
 458 }
 459
 460 static gmx_stochd_t *init_stochd(const t_inputrec *ir)
 461 {
 462     gmx_stochd_t   *sd;
 463
 464     snew(sd, 1);
 465
 466     const t_grpopts *opts = &ir->opts;
 467     int              ngtc = opts->ngtc;
 468
 469     if (ir->eI == eiBD)
 470     {
 471         snew(sd->bd_rf, ngtc);
 472     }
 473     else if (EI_SD(ir->eI))
 474     {
 475         snew(sd->sdc, ngtc);
 476         snew(sd->sdsig, ngtc);
 477
 478         gmx_sd_const_t *sdc = sd->sdc;
 479
 480         for (int gt = 0; gt < ngtc; gt++)
 481         {
 482             if (opts->tau_t[gt] > 0)
 483             {
 484                 sdc[gt].em  = exp(-ir->delta_t/opts->tau_t[gt]);
 485             }
 486             else
 487             {
 488                 /* No friction and noise on this group */
 489                 sdc[gt].em  = 1;
 490             }
 491         }
 492     }
 493     else if (ETC_ANDERSEN(ir->etc))
 494     {
 495         snew(sd->randomize_group, ngtc);
 496         snew(sd->boltzfac, ngtc);
 497
 498         /* for now, assume that all groups, if randomized, are randomized at the same rate, i.e. tau_t is the same. */
 499         /* since constraint groups don't necessarily match up with temperature groups! This is checked in readir.c */
 500
 501         for (int gt = 0; gt < ngtc; gt++)
 502         {
 503             real reft = std::max<real>(0, opts->ref_t[gt]);
 504             if ((opts->tau_t[gt] > 0) && (reft > 0))  /* tau_t or ref_t = 0 means that no randomization is done */
 505             {
 506                 sd->randomize_group[gt] = TRUE;
 507                 sd->boltzfac[gt]        = BOLTZ*opts->ref_t[gt];
 508             }
 509             else
 510             {
 511                 sd->randomize_group[gt] = FALSE;
 512             }
 513         }
 514     }
 515
 516     return sd;
 517 }
 518
 519 void update_temperature_constants(gmx_update_t *upd, const t_inputrec *ir)
 520 {
 521     if (ir->eI == eiBD)
 522     {
 523         if (ir->bd_fric != 0)
 524         {
 525             for (int gt = 0; gt < ir->opts.ngtc; gt++)
 526             {
 527                 upd->sd->bd_rf[gt] = std::sqrt(2.0*BOLTZ*ir->opts.ref_t[gt]/(ir->bd_fric*ir->delta_t));
 528             }
 529         }
 530         else
 531         {
 532             for (int gt = 0; gt < ir->opts.ngtc; gt++)
 533             {
 534                 upd->sd->bd_rf[gt] = std::sqrt(2.0*BOLTZ*ir->opts.ref_t[gt]);
 535             }
 536         }
 537     }
 538     if (ir->eI == eiSD1)
 539     {
 540         for (int gt = 0; gt < ir->opts.ngtc; gt++)
 541         {
 542             real kT = BOLTZ*ir->opts.ref_t[gt];
 543             /* The mass is accounted for later, since this differs per atom */
 544             upd->sd->sdsig[gt].V  = std::sqrt(kT*(1 - upd->sd->sdc[gt].em*upd->sd->sdc[gt].em));
 545         }
 546     }
 547 }
 548
 549 gmx_update_t *init_update(const t_inputrec *ir)
 550 {
 551     gmx_update_t *upd;
 552
 553     snew(upd, 1);
 554
 555     if (ir->eI == eiBD || EI_SD(ir->eI) || ir->etc == etcVRESCALE || ETC_ANDERSEN(ir->etc))
 556     {
 557         upd->sd    = init_stochd(ir);
 558     }
 559
 560     update_temperature_constants(upd, ir);
 561
 562     upd->xp        = NULL;
 563     upd->xp_nalloc = 0;
 564
 565     return upd;
 566 }
 567
 568 void update_realloc(gmx_update_t *upd, int state_nalloc)
 569 {
 570     GMX_ASSERT(upd, "upd must be allocated before its fields can be reallocated");
 571     if (state_nalloc > upd->xp_nalloc)
 572     {
 573         upd->xp_nalloc = state_nalloc;
 574         /* We need to allocate one element extra, since we might use
 575          * (unaligned) 4-wide SIMD loads to access rvec entries. */
 576         srenew(upd->xp, upd->xp_nalloc + 1);
 577     }
 578 }
 579
 580 static void do_update_sd1(gmx_stochd_t *sd,
 581                           int start, int nrend, double dt,
 582                           rvec accel[], ivec nFreeze[],
 583                           real invmass[], unsigned short ptype[],
 584                           unsigned short cFREEZE[], unsigned short cACC[],
 585                           unsigned short cTC[],
 586                           rvec x[], rvec xprime[], rvec v[], rvec f[],
 587                           gmx_bool bDoConstr,
 588                           gmx_bool bFirstHalfConstr,
 589                           gmx_int64_t step, int seed, int* gatindex)
 590 {
 591     gmx_sd_const_t *sdc;
 592     gmx_sd_sigma_t *sig;
 593     int             gf = 0, ga = 0, gt = 0;
 594     real            ism;
 595     int             n, d;
 596
 597     // Even 0 bits internal counter gives 2x64 ints (more than enough for three table lookups)
 598     gmx::ThreeFry2x64<0> rng(seed, gmx::RandomDomain::UpdateCoordinates);
 599     gmx::TabulatedNormalDistribution<real, 14> dist;
 600
 601     sdc = sd->sdc;
 602     sig = sd->sdsig;
 603
 604     if (!bDoConstr)
 605     {
 606         for (n = start; n < nrend; n++)
 607         {
 608             int  ng = gatindex ? gatindex[n] : n;
 609
 610             rng.restart(step, ng);
 611             dist.reset();
 612
 613             ism = std::sqrt(invmass[n]);
 614
 615             if (cFREEZE)
 616             {
 617                 gf  = cFREEZE[n];
 618             }
 619             if (cACC)
 620             {
 621                 ga  = cACC[n];
 622             }
 623             if (cTC)
 624             {
 625                 gt  = cTC[n];
 626             }
 627
 628             for (d = 0; d < DIM; d++)
 629             {
 630                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 631                 {
 632                     real sd_V, vn;
 633
 634                     sd_V         = ism*sig[gt].V*dist(rng);
 635                     vn           = v[n][d] + (invmass[n]*f[n][d] + accel[ga][d])*dt;
 636                     v[n][d]      = vn*sdc[gt].em + sd_V;
 637                     /* Here we include half of the friction+noise
 638                      * update of v into the integration of x.
 639                      */
 640                     xprime[n][d] = x[n][d] + 0.5*(vn + v[n][d])*dt;
 641                 }
 642                 else
 643                 {
 644                     v[n][d]      = 0.0;
 645                     xprime[n][d] = x[n][d];
 646                 }
 647             }
 648         }
 649     }
 650     else
 651     {
 652         /* We do have constraints */
 653         if (bFirstHalfConstr)
 654         {
 655             /* First update without friction and noise */
 656             real im;
 657
 658             for (n = start; n < nrend; n++)
 659             {
 660                 im = invmass[n];
 661
 662                 if (cFREEZE)
 663                 {
 664                     gf  = cFREEZE[n];
 665                 }
 666                 if (cACC)
 667                 {
 668                     ga  = cACC[n];
 669                 }
 670
 671                 for (d = 0; d < DIM; d++)
 672                 {
 673                     if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 674                     {
 675                         v[n][d]      = v[n][d] + (im*f[n][d] + accel[ga][d])*dt;
 676                         xprime[n][d] = x[n][d] +  v[n][d]*dt;
 677                     }
 678                     else
 679                     {
 680                         v[n][d]      = 0.0;
 681                         xprime[n][d] = x[n][d];
 682                     }
 683                 }
 684             }
 685         }
 686         else
 687         {
 688             /* Update friction and noise only */
 689             for (n = start; n < nrend; n++)
 690             {
 691                 int  ng = gatindex ? gatindex[n] : n;
 692
 693                 rng.restart(step, ng);
 694                 dist.reset();
 695
 696                 ism = std::sqrt(invmass[n]);
 697
 698                 if (cFREEZE)
 699                 {
 700                     gf  = cFREEZE[n];
 701                 }
 702                 if (cTC)
 703                 {
 704                     gt  = cTC[n];
 705                 }
 706
 707                 for (d = 0; d < DIM; d++)
 708                 {
 709                     if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 710                     {
 711                         real sd_V, vn;
 712
 713                         sd_V         = ism*sig[gt].V*dist(rng);
 714                         vn           = v[n][d];
 715                         v[n][d]      = vn*sdc[gt].em + sd_V;
 716                         /* Add the friction and noise contribution only */
 717                         xprime[n][d] = xprime[n][d] + 0.5*(v[n][d] - vn)*dt;
 718                     }
 719                 }
 720             }
 721         }
 722     }
 723 }
 724
 725 static void do_update_bd(int start, int nrend, double dt,
 726                          ivec nFreeze[],
 727                          real invmass[], unsigned short ptype[],
 728                          unsigned short cFREEZE[], unsigned short cTC[],
 729                          rvec x[], rvec xprime[], rvec v[],
 730                          rvec f[], real friction_coefficient,
 731                          real *rf, gmx_int64_t step, int seed,
 732                          int* gatindex)
 733 {
 734     /* note -- these appear to be full step velocities . . .  */
 735     int    gf = 0, gt = 0;
 736     real   vn;
 737     real   invfr = 0;
 738     int    n, d;
 739     // Use 1 bit of internal counters to give us 2*2 64-bits values per stream
 740     // Each 64-bit value is enough for 4 normal distribution table numbers.
 741     gmx::ThreeFry2x64<0> rng(seed, gmx::RandomDomain::UpdateCoordinates);
 742     gmx::TabulatedNormalDistribution<real, 14> dist;
 743
 744     if (friction_coefficient != 0)
 745     {
 746         invfr = 1.0/friction_coefficient;
 747     }
 748
 749     for (n = start; (n < nrend); n++)
 750     {
 751         int  ng  = gatindex ? gatindex[n] : n;
 752
 753         rng.restart(step, ng);
 754         dist.reset();
 755
 756         if (cFREEZE)
 757         {
 758             gf = cFREEZE[n];
 759         }
 760         if (cTC)
 761         {
 762             gt = cTC[n];
 763         }
 764         for (d = 0; (d < DIM); d++)
 765         {
 766             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 767             {
 768                 if (friction_coefficient != 0)
 769                 {
 770                     vn = invfr*f[n][d] + rf[gt]*dist(rng);
 771                 }
 772                 else
 773                 {
 774                     /* NOTE: invmass = 2/(mass*friction_constant*dt) */
 775                     vn = 0.5*invmass[n]*f[n][d]*dt
 776                         + std::sqrt(0.5*invmass[n])*rf[gt]*dist(rng);
 777                 }
 778
 779                 v[n][d]      = vn;
 780                 xprime[n][d] = x[n][d]+vn*dt;
 781             }
 782             else
 783             {
 784                 v[n][d]      = 0.0;
 785                 xprime[n][d] = x[n][d];
 786             }
 787         }
 788     }
 789 }
 790
 791 static void dump_it_all(FILE gmx_unused *fp, const char gmx_unused *title,
 792                         int gmx_unused natoms, rvec gmx_unused x[], rvec gmx_unused xp[],
 793                         rvec gmx_unused v[], rvec gmx_unused f[])
 794 {
 795 #ifdef DEBUG
 796     if (fp)
 797     {
 798         fprintf(fp, "%s\n", title);
 799         pr_rvecs(fp, 0, "x", x, natoms);
 800         pr_rvecs(fp, 0, "xp", xp, natoms);
 801         pr_rvecs(fp, 0, "v", v, natoms);
 802         pr_rvecs(fp, 0, "f", f, natoms);
 803     }
 804 #endif
 805 }
 806
 807 static void calc_ke_part_normal(rvec v[], t_grpopts *opts, t_mdatoms *md,
 808                                 gmx_ekindata_t *ekind, t_nrnb *nrnb, gmx_bool bEkinAveVel)
 809 {
 810     int           g;
 811     t_grp_tcstat *tcstat  = ekind->tcstat;
 812     t_grp_acc    *grpstat = ekind->grpstat;
 813     int           nthread, thread;
 814
 815     /* three main: VV with AveVel, vv with AveEkin, leap with AveEkin.  Leap with AveVel is also
 816        an option, but not supported now.
 817        bEkinAveVel: If TRUE, we sum into ekin, if FALSE, into ekinh.
 818      */
 819
 820     /* group velocities are calculated in update_ekindata and
 821      * accumulated in acumulate_groups.
 822      * Now the partial global and groups ekin.
 823      */
 824     for (g = 0; (g < opts->ngtc); g++)
 825     {
 826         copy_mat(tcstat[g].ekinh, tcstat[g].ekinh_old);
 827         if (bEkinAveVel)
 828         {
 829             clear_mat(tcstat[g].ekinf);
 830             tcstat[g].ekinscalef_nhc = 1.0;   /* need to clear this -- logic is complicated! */
 831         }
 832         else
 833         {
 834             clear_mat(tcstat[g].ekinh);
 835         }
 836     }
 837     ekind->dekindl_old = ekind->dekindl;
 838     nthread            = gmx_omp_nthreads_get(emntUpdate);
 839
 840 #pragma omp parallel for num_threads(nthread) schedule(static)
 841     for (thread = 0; thread < nthread; thread++)
 842     {
 843         // This OpenMP only loops over arrays and does not call any functions
 844         // or memory allocation. It should not be able to throw, so for now
 845         // we do not need a try/catch wrapper.
 846         int     start_t, end_t, n;
 847         int     ga, gt;
 848         rvec    v_corrt;
 849         real    hm;
 850         int     d, m;
 851         matrix *ekin_sum;
 852         real   *dekindl_sum;
 853
 854         start_t = ((thread+0)*md->homenr)/nthread;
 855         end_t   = ((thread+1)*md->homenr)/nthread;
 856
 857         ekin_sum    = ekind->ekin_work[thread];
 858         dekindl_sum = ekind->dekindl_work[thread];
 859
 860         for (gt = 0; gt < opts->ngtc; gt++)
 861         {
 862             clear_mat(ekin_sum[gt]);
 863         }
 864         *dekindl_sum = 0.0;
 865
 866         ga = 0;
 867         gt = 0;
 868         for (n = start_t; n < end_t; n++)
 869         {
 870             if (md->cACC)
 871             {
 872                 ga = md->cACC[n];
 873             }
 874             if (md->cTC)
 875             {
 876                 gt = md->cTC[n];
 877             }
 878             hm   = 0.5*md->massT[n];
 879
 880             for (d = 0; (d < DIM); d++)
 881             {
 882                 v_corrt[d]  = v[n][d]  - grpstat[ga].u[d];
 883             }
 884             for (d = 0; (d < DIM); d++)
 885             {
 886                 for (m = 0; (m < DIM); m++)
 887                 {
 888                     /* if we're computing a full step velocity, v_corrt[d] has v(t).  Otherwise, v(t+dt/2) */
 889                     ekin_sum[gt][m][d] += hm*v_corrt[m]*v_corrt[d];
 890                 }
 891             }
 892             if (md->nMassPerturbed && md->bPerturbed[n])
 893             {
 894                 *dekindl_sum +=
 895                     0.5*(md->massB[n] - md->massA[n])*iprod(v_corrt, v_corrt);
 896             }
 897         }
 898     }
 899
 900     ekind->dekindl = 0;
 901     for (thread = 0; thread < nthread; thread++)
 902     {
 903         for (g = 0; g < opts->ngtc; g++)
 904         {
 905             if (bEkinAveVel)
 906             {
 907                 m_add(tcstat[g].ekinf, ekind->ekin_work[thread][g],
 908                       tcstat[g].ekinf);
 909             }
 910             else
 911             {
 912                 m_add(tcstat[g].ekinh, ekind->ekin_work[thread][g],
 913                       tcstat[g].ekinh);
 914             }
 915         }
 916
 917         ekind->dekindl += *ekind->dekindl_work[thread];
 918     }
 919
 920     inc_nrnb(nrnb, eNR_EKIN, md->homenr);
 921 }
 922
 923 static void calc_ke_part_visc(matrix box, rvec x[], rvec v[],
 924                               t_grpopts *opts, t_mdatoms *md,
 925                               gmx_ekindata_t *ekind,
 926                               t_nrnb *nrnb, gmx_bool bEkinAveVel)
 927 {
 928     int           start = 0, homenr = md->homenr;
 929     int           g, d, n, m, gt = 0;
 930     rvec          v_corrt;
 931     real          hm;
 932     t_grp_tcstat *tcstat = ekind->tcstat;
 933     t_cos_acc    *cosacc = &(ekind->cosacc);
 934     real          dekindl;
 935     real          fac, cosz;
 936     double        mvcos;
 937
 938     for (g = 0; g < opts->ngtc; g++)
 939     {
 940         copy_mat(ekind->tcstat[g].ekinh, ekind->tcstat[g].ekinh_old);
 941         clear_mat(ekind->tcstat[g].ekinh);
 942     }
 943     ekind->dekindl_old = ekind->dekindl;
 944
 945     fac     = 2*M_PI/box[ZZ][ZZ];
 946     mvcos   = 0;
 947     dekindl = 0;
 948     for (n = start; n < start+homenr; n++)
 949     {
 950         if (md->cTC)
 951         {
 952             gt = md->cTC[n];
 953         }
 954         hm   = 0.5*md->massT[n];
 955
 956         /* Note that the times of x and v differ by half a step */
 957         /* MRS -- would have to be changed for VV */
 958         cosz         = cos(fac*x[n][ZZ]);
 959         /* Calculate the amplitude of the new velocity profile */
 960         mvcos       += 2*cosz*md->massT[n]*v[n][XX];
 961
 962         copy_rvec(v[n], v_corrt);
 963         /* Subtract the profile for the kinetic energy */
 964         v_corrt[XX] -= cosz*cosacc->vcos;
 965         for (d = 0; (d < DIM); d++)
 966         {
 967             for (m = 0; (m < DIM); m++)
 968             {
 969                 /* if we're computing a full step velocity, v_corrt[d] has v(t).  Otherwise, v(t+dt/2) */
 970                 if (bEkinAveVel)
 971                 {
 972                     tcstat[gt].ekinf[m][d] += hm*v_corrt[m]*v_corrt[d];
 973                 }
 974                 else
 975                 {
 976                     tcstat[gt].ekinh[m][d] += hm*v_corrt[m]*v_corrt[d];
 977                 }
 978             }
 979         }
 980         if (md->nPerturbed && md->bPerturbed[n])
 981         {
 982             /* The minus sign here might be confusing.
 983              * The kinetic contribution from dH/dl doesn't come from
 984              * d m(l)/2 v^2 / dl, but rather from d p^2/2m(l) / dl,
 985              * where p are the momenta. The difference is only a minus sign.
 986              */
 987             dekindl -= 0.5*(md->massB[n] - md->massA[n])*iprod(v_corrt, v_corrt);
 988         }
 989     }
 990     ekind->dekindl = dekindl;
 991     cosacc->mvcos  = mvcos;
 992
 993     inc_nrnb(nrnb, eNR_EKIN, homenr);
 994 }
 995
 996 void calc_ke_part(t_state *state, t_grpopts *opts, t_mdatoms *md,
 997                   gmx_ekindata_t *ekind, t_nrnb *nrnb, gmx_bool bEkinAveVel)
 998 {
 999     if (ekind->cosacc.cos_accel == 0)
1000     {
1001         calc_ke_part_normal(state->v, opts, md, ekind, nrnb, bEkinAveVel);
1002     }
1003     else
1004     {
1005         calc_ke_part_visc(state->box, state->x, state->v, opts, md, ekind, nrnb, bEkinAveVel);
1006     }
1007 }
1008
1009 extern void init_ekinstate(ekinstate_t *ekinstate, const t_inputrec *ir)
1010 {
1011     ekinstate->ekin_n = ir->opts.ngtc;
1012     snew(ekinstate->ekinh, ekinstate->ekin_n);
1013     snew(ekinstate->ekinf, ekinstate->ekin_n);
1014     snew(ekinstate->ekinh_old, ekinstate->ekin_n);
1015     snew(ekinstate->ekinscalef_nhc, ekinstate->ekin_n);
1016     snew(ekinstate->ekinscaleh_nhc, ekinstate->ekin_n);
1017     snew(ekinstate->vscale_nhc, ekinstate->ekin_n);
1018     ekinstate->dekindl = 0;
1019     ekinstate->mvcos   = 0;
1020 }
1021
1022 void update_ekinstate(ekinstate_t *ekinstate, gmx_ekindata_t *ekind)
1023 {
1024     int i;
1025
1026     for (i = 0; i < ekinstate->ekin_n; i++)
1027     {
1028         copy_mat(ekind->tcstat[i].ekinh, ekinstate->ekinh[i]);
1029         copy_mat(ekind->tcstat[i].ekinf, ekinstate->ekinf[i]);
1030         copy_mat(ekind->tcstat[i].ekinh_old, ekinstate->ekinh_old[i]);
1031         ekinstate->ekinscalef_nhc[i] = ekind->tcstat[i].ekinscalef_nhc;
1032         ekinstate->ekinscaleh_nhc[i] = ekind->tcstat[i].ekinscaleh_nhc;
1033         ekinstate->vscale_nhc[i]     = ekind->tcstat[i].vscale_nhc;
1034     }
1035
1036     copy_mat(ekind->ekin, ekinstate->ekin_total);
1037     ekinstate->dekindl = ekind->dekindl;
1038     ekinstate->mvcos   = ekind->cosacc.mvcos;
1039
1040 }
1041
1042 void restore_ekinstate_from_state(t_commrec *cr,
1043                                   gmx_ekindata_t *ekind, const ekinstate_t *ekinstate)
1044 {
1045     int i, n;
1046
1047     if (MASTER(cr))
1048     {
1049         for (i = 0; i < ekinstate->ekin_n; i++)
1050         {
1051             copy_mat(ekinstate->ekinh[i], ekind->tcstat[i].ekinh);
1052             copy_mat(ekinstate->ekinf[i], ekind->tcstat[i].ekinf);
1053             copy_mat(ekinstate->ekinh_old[i], ekind->tcstat[i].ekinh_old);
1054             ekind->tcstat[i].ekinscalef_nhc = ekinstate->ekinscalef_nhc[i];
1055             ekind->tcstat[i].ekinscaleh_nhc = ekinstate->ekinscaleh_nhc[i];
1056             ekind->tcstat[i].vscale_nhc     = ekinstate->vscale_nhc[i];
1057         }
1058
1059         copy_mat(ekinstate->ekin_total, ekind->ekin);
1060
1061         ekind->dekindl      = ekinstate->dekindl;
1062         ekind->cosacc.mvcos = ekinstate->mvcos;
1063         n                   = ekinstate->ekin_n;
1064     }
1065
1066     if (PAR(cr))
1067     {
1068         gmx_bcast(sizeof(n), &n, cr);
1069         for (i = 0; i < n; i++)
1070         {
1071             gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinh[0][0]),
1072                       ekind->tcstat[i].ekinh[0], cr);
1073             gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinf[0][0]),
1074                       ekind->tcstat[i].ekinf[0], cr);
1075             gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinh_old[0][0]),
1076                       ekind->tcstat[i].ekinh_old[0], cr);
1077
1078             gmx_bcast(sizeof(ekind->tcstat[i].ekinscalef_nhc),
1079                       &(ekind->tcstat[i].ekinscalef_nhc), cr);
1080             gmx_bcast(sizeof(ekind->tcstat[i].ekinscaleh_nhc),
1081                       &(ekind->tcstat[i].ekinscaleh_nhc), cr);
1082             gmx_bcast(sizeof(ekind->tcstat[i].vscale_nhc),
1083                       &(ekind->tcstat[i].vscale_nhc), cr);
1084         }
1085         gmx_bcast(DIM*DIM*sizeof(ekind->ekin[0][0]),
1086                   ekind->ekin[0], cr);
1087
1088         gmx_bcast(sizeof(ekind->dekindl), &ekind->dekindl, cr);
1089         gmx_bcast(sizeof(ekind->cosacc.mvcos), &ekind->cosacc.mvcos, cr);
1090     }
1091 }
1092
1093 void set_deform_reference_box(gmx_update_t *upd, gmx_int64_t step, matrix box)
1094 {
1095     upd->deformref_step = step;
1096     copy_mat(box, upd->deformref_box);
1097 }
1098
1099 static void deform(gmx_update_t *upd,
1100                    int start, int homenr, rvec x[], matrix box,
1101                    const t_inputrec *ir, gmx_int64_t step)
1102 {
1103     matrix bnew, invbox, mu;
1104     real   elapsed_time;
1105     int    i, j;
1106
1107     elapsed_time = (step + 1 - upd->deformref_step)*ir->delta_t;
1108     copy_mat(box, bnew);
1109     for (i = 0; i < DIM; i++)
1110     {
1111         for (j = 0; j < DIM; j++)
1112         {
1113             if (ir->deform[i][j] != 0)
1114             {
1115                 bnew[i][j] =
1116                     upd->deformref_box[i][j] + elapsed_time*ir->deform[i][j];
1117             }
1118         }
1119     }
1120     /* We correct the off-diagonal elements,
1121      * which can grow indefinitely during shearing,
1122      * so the shifts do not get messed up.
1123      */
1124     for (i = 1; i < DIM; i++)
1125     {
1126         for (j = i-1; j >= 0; j--)
1127         {
1128             while (bnew[i][j] - box[i][j] > 0.5*bnew[j][j])
1129             {
1130                 rvec_dec(bnew[i], bnew[j]);
1131             }
1132             while (bnew[i][j] - box[i][j] < -0.5*bnew[j][j])
1133             {
1134                 rvec_inc(bnew[i], bnew[j]);
1135             }
1136         }
1137     }
1138     gmx::invertBoxMatrix(box, invbox);
1139     copy_mat(bnew, box);
1140     mmul_ur0(box, invbox, mu);
1141
1142     for (i = start; i < start+homenr; i++)
1143     {
1144         x[i][XX] = mu[XX][XX]*x[i][XX]+mu[YY][XX]*x[i][YY]+mu[ZZ][XX]*x[i][ZZ];
1145         x[i][YY] = mu[YY][YY]*x[i][YY]+mu[ZZ][YY]*x[i][ZZ];
1146         x[i][ZZ] = mu[ZZ][ZZ]*x[i][ZZ];
1147     }
1148 }
1149
1150 void update_tcouple(gmx_int64_t       step,
1151                     t_inputrec       *inputrec,
1152                     t_state          *state,
1153                     gmx_ekindata_t   *ekind,
1154                     t_extmass        *MassQ,
1155                     t_mdatoms        *md)
1156
1157 {
1158     gmx_bool   bTCouple = FALSE;
1159     real       dttc;
1160     int        i, offset;
1161
1162     /* if using vv with trotter decomposition methods, we do this elsewhere in the code */
1163     if (inputrec->etc != etcNO &&
1164         !(inputrecNvtTrotter(inputrec) || inputrecNptTrotter(inputrec) || inputrecNphTrotter(inputrec)))
1165     {
1166         /* We should only couple after a step where energies were determined (for leapfrog versions)
1167            or the step energies are determined, for velocity verlet versions */
1168
1169         if (EI_VV(inputrec->eI))
1170         {
1171             offset = 0;
1172         }
1173         else
1174         {
1175             offset = 1;
1176         }
1177         bTCouple = (inputrec->nsttcouple == 1 ||
1178                     do_per_step(step+inputrec->nsttcouple-offset,
1179                                 inputrec->nsttcouple));
1180     }
1181
1182     if (bTCouple)
1183     {
1184         dttc = inputrec->nsttcouple*inputrec->delta_t;
1185
1186         switch (inputrec->etc)
1187         {
1188             case etcNO:
1189                 break;
1190             case etcBERENDSEN:
1191                 berendsen_tcoupl(inputrec, ekind, dttc);
1192                 break;
1193             case etcNOSEHOOVER:
1194                 nosehoover_tcoupl(&(inputrec->opts), ekind, dttc,
1195                                   state->nosehoover_xi, state->nosehoover_vxi, MassQ);
1196                 break;
1197             case etcVRESCALE:
1198                 vrescale_tcoupl(inputrec, step, ekind, dttc,
1199                                 state->therm_integral);
1200                 break;
1201         }
1202         /* rescale in place here */
1203         if (EI_VV(inputrec->eI))
1204         {
1205             rescale_velocities(ekind, md, 0, md->homenr, state->v);
1206         }
1207     }
1208     else
1209     {
1210         /* Set the T scaling lambda to 1 to have no scaling */
1211         for (i = 0; (i < inputrec->opts.ngtc); i++)
1212         {
1213             ekind->tcstat[i].lambda = 1.0;
1214         }
1215     }
1216 }
1217
1218 void update_pcouple(FILE             *fplog,
1219                     gmx_int64_t       step,
1220                     t_inputrec       *inputrec,
1221                     t_state          *state,
1222                     matrix            pcoupl_mu,
1223                     matrix            M,
1224                     gmx_bool          bInitStep)
1225 {
1226     gmx_bool   bPCouple = FALSE;
1227     real       dtpc     = 0;
1228     int        i;
1229
1230     /* if using Trotter pressure, we do this in coupling.c, so we leave it false. */
1231     if (inputrec->epc != epcNO && (!(inputrecNptTrotter(inputrec) || inputrecNphTrotter(inputrec))))
1232     {
1233         /* We should only couple after a step where energies were determined */
1234         bPCouple = (inputrec->nstpcouple == 1 ||
1235                     do_per_step(step+inputrec->nstpcouple-1,
1236                                 inputrec->nstpcouple));
1237     }
1238
1239     clear_mat(pcoupl_mu);
1240     for (i = 0; i < DIM; i++)
1241     {
1242         pcoupl_mu[i][i] = 1.0;
1243     }
1244
1245     clear_mat(M);
1246
1247     if (bPCouple)
1248     {
1249         dtpc = inputrec->nstpcouple*inputrec->delta_t;
1250
1251         switch (inputrec->epc)
1252         {
1253             /* We can always pcoupl, even if we did not sum the energies
1254              * the previous step, since state->pres_prev is only updated
1255              * when the energies have been summed.
1256              */
1257             case (epcNO):
1258                 break;
1259             case (epcBERENDSEN):
1260                 if (!bInitStep)
1261                 {
1262                     berendsen_pcoupl(fplog, step, inputrec, dtpc, state->pres_prev, state->box,
1263                                      pcoupl_mu);
1264                 }
1265                 break;
1266             case (epcPARRINELLORAHMAN):
1267                 parrinellorahman_pcoupl(fplog, step, inputrec, dtpc, state->pres_prev,
1268                                         state->box, state->box_rel, state->boxv,
1269                                         M, pcoupl_mu, bInitStep);
1270                 break;
1271             default:
1272                 break;
1273         }
1274     }
1275 }
1276
1277 void update_constraints(FILE             *fplog,
1278                         gmx_int64_t       step,
1279                         real             *dvdlambda, /* the contribution to be added to the bonded interactions */
1280                         t_inputrec       *inputrec,  /* input record and box stuff      */
1281                         t_mdatoms        *md,
1282                         t_state          *state,
1283                         gmx_bool          bMolPBC,
1284                         t_graph          *graph,
1285                         rvec              force[],   /* forces on home particles */
1286                         t_idef           *idef,
1287                         tensor            vir_part,
1288                         t_commrec        *cr,
1289                         t_nrnb           *nrnb,
1290                         gmx_wallcycle_t   wcycle,
1291                         gmx_update_t     *upd,
1292                         gmx_constr_t      constr,
1293                         gmx_bool          bFirstHalf,
1294                         gmx_bool          bCalcVir)
1295 {
1296     gmx_bool             bLastStep, bLog = FALSE, bEner = FALSE, bDoConstr = FALSE;
1297     double               dt;
1298     int                  start, homenr, nrend, i;
1299     tensor               vir_con;
1300     int                  nth, th;
1301
1302     if (constr)
1303     {
1304         bDoConstr = TRUE;
1305     }
1306     if (bFirstHalf && !EI_VV(inputrec->eI))
1307     {
1308         bDoConstr = FALSE;
1309     }
1310
1311     /* for now, SD update is here -- though it really seems like it
1312        should be reformulated as a velocity verlet method, since it has two parts */
1313
1314     start  = 0;
1315     homenr = md->homenr;
1316     nrend  = start+homenr;
1317
1318     dt   = inputrec->delta_t;
1319
1320     /*
1321      *  Steps (7C, 8C)
1322      *  APPLY CONSTRAINTS:
1323      *  BLOCK SHAKE
1324
1325      * When doing PR pressure coupling we have to constrain the
1326      * bonds in each iteration. If we are only using Nose-Hoover tcoupling
1327      * it is enough to do this once though, since the relative velocities
1328      * after this will be normal to the bond vector
1329      */
1330
1331     if (bDoConstr)
1332     {
1333         /* clear out constraints before applying */
1334         clear_mat(vir_part);
1335
1336         bLastStep = (step == inputrec->init_step+inputrec->nsteps);
1337         bLog      = (do_per_step(step, inputrec->nstlog) || bLastStep || (step < 0));
1338         bEner     = (do_per_step(step, inputrec->nstenergy) || bLastStep);
1339         /* Constrain the coordinates upd->xp */
1340         wallcycle_start(wcycle, ewcCONSTR);
1341         if (EI_VV(inputrec->eI) && bFirstHalf)
1342         {
1343             constrain(NULL, bLog, bEner, constr, idef,
1344                       inputrec, cr, step, 1, 1.0, md,
1345                       state->x, state->v, state->v,
1346                       bMolPBC, state->box,
1347                       state->lambda[efptBONDED], dvdlambda,
1348                       NULL, bCalcVir ? &vir_con : NULL, nrnb, econqVeloc);
1349         }
1350         else
1351         {
1352             constrain(NULL, bLog, bEner, constr, idef,
1353                       inputrec, cr, step, 1, 1.0, md,
1354                       state->x, upd->xp, NULL,
1355                       bMolPBC, state->box,
1356                       state->lambda[efptBONDED], dvdlambda,
1357                       state->v, bCalcVir ? &vir_con : NULL, nrnb, econqCoord);
1358         }
1359         wallcycle_stop(wcycle, ewcCONSTR);
1360
1361         where();
1362
1363         dump_it_all(fplog, "After Shake",
1364                     state->natoms, state->x, upd->xp, state->v, force);
1365
1366         if (bCalcVir)
1367         {
1368             m_add(vir_part, vir_con, vir_part);
1369             if (debug)
1370             {
1371                 pr_rvecs(debug, 0, "constraint virial", vir_part, DIM);
1372             }
1373         }
1374     }
1375
1376     where();
1377
1378     if (inputrec->eI == eiSD1 && bDoConstr && !bFirstHalf)
1379     {
1380         wallcycle_start(wcycle, ewcUPDATE);
1381
1382         nth = gmx_omp_nthreads_get(emntUpdate);
1383
1384 #pragma omp parallel for num_threads(nth) schedule(static)
1385         for (th = 0; th < nth; th++)
1386         {
1387             try
1388             {
1389                 int start_th, end_th;
1390
1391                 start_th = start + ((nrend-start)* th   )/nth;
1392                 end_th   = start + ((nrend-start)*(th+1))/nth;
1393
1394                 /* The second part of the SD integration */
1395                 do_update_sd1(upd->sd,
1396                               start_th, end_th, dt,
1397                               inputrec->opts.acc, inputrec->opts.nFreeze,
1398                               md->invmass, md->ptype,
1399                               md->cFREEZE, md->cACC, md->cTC,
1400                               state->x, upd->xp, state->v, force,
1401                               bDoConstr, FALSE,
1402                               step, inputrec->ld_seed,
1403                               DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
1404             }
1405             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1406         }
1407         inc_nrnb(nrnb, eNR_UPDATE, homenr);
1408         wallcycle_stop(wcycle, ewcUPDATE);
1409
1410         if (bDoConstr)
1411         {
1412             /* Constrain the coordinates upd->xp for half a time step */
1413             wallcycle_start(wcycle, ewcCONSTR);
1414
1415             constrain(NULL, bLog, bEner, constr, idef,
1416                       inputrec, cr, step, 1, 0.5, md,
1417                       state->x, upd->xp, NULL,
1418                       bMolPBC, state->box,
1419                       state->lambda[efptBONDED], dvdlambda,
1420                       state->v, NULL, nrnb, econqCoord);
1421
1422             wallcycle_stop(wcycle, ewcCONSTR);
1423         }
1424     }
1425
1426     /* We must always unshift after updating coordinates; if we did not shake
1427        x was shifted in do_force */
1428
1429     if (!(bFirstHalf)) /* in the first half of vv, no shift. */
1430     {
1431         /* NOTE This part of the update actually does not belong with
1432          * the constraints, since we also call it without constraints.
1433          * But currently we always integrate to a temporary buffer and
1434          * then copy the results back here.
1435          */
1436         wallcycle_start_nocount(wcycle, ewcUPDATE);
1437
1438         if (md->cFREEZE != NULL && constr != NULL)
1439         {
1440             /* If we have atoms that are frozen along some, but not all
1441              * dimensions, the constraints will have moved them also along
1442              * the frozen dimensions. To freeze such degrees of freedom
1443              * we copy them back here to later copy them forward. It would
1444              * be more elegant and slightly more efficient to copies zero
1445              * times instead of twice, but the graph case below prevents this.
1446              */
1447             const ivec *nFreeze                     = inputrec->opts.nFreeze;
1448             bool        partialFreezeAndConstraints = false;
1449             for (int g = 0; g < inputrec->opts.ngfrz; g++)
1450             {
1451                 int numFreezeDim = nFreeze[g][XX] + nFreeze[g][YY] + nFreeze[g][ZZ];
1452                 if (numFreezeDim > 0 && numFreezeDim < 3)
1453                 {
1454                     partialFreezeAndConstraints = true;
1455                 }
1456             }
1457             if (partialFreezeAndConstraints)
1458             {
1459                 for (int i = start; i < nrend; i++)
1460                 {
1461                     int g = md->cFREEZE[i];
1462
1463                     for (int d = 0; d < DIM; d++)
1464                     {
1465                         if (nFreeze[g][d])
1466                         {
1467                             upd->xp[i][d] = state->x[i][d];
1468                         }
1469                     }
1470                 }
1471             }
1472         }
1473
1474         if (graph && (graph->nnodes > 0))
1475         {
1476             unshift_x(graph, state->box, state->x, upd->xp);
1477             if (TRICLINIC(state->box))
1478             {
1479                 inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
1480             }
1481             else
1482             {
1483                 inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
1484             }
1485         }
1486         else
1487         {
1488 #ifndef __clang_analyzer__
1489             // cppcheck-suppress unreadVariable
1490             nth = gmx_omp_nthreads_get(emntUpdate);
1491 #endif
1492 #pragma omp parallel for num_threads(nth) schedule(static)
1493             for (i = start; i < nrend; i++)
1494             {
1495                 // Trivial statement, does not throw
1496                 copy_rvec(upd->xp[i], state->x[i]);
1497             }
1498         }
1499         wallcycle_stop(wcycle, ewcUPDATE);
1500
1501         dump_it_all(fplog, "After unshift",
1502                     state->natoms, state->x, upd->xp, state->v, force);
1503     }
1504 /* ############# END the update of velocities and positions ######### */
1505 }
1506
1507 void update_box(FILE             *fplog,
1508                 gmx_int64_t       step,
1509                 t_inputrec       *inputrec,  /* input record and box stuff      */
1510                 t_mdatoms        *md,
1511                 t_state          *state,
1512                 rvec              force[],   /* forces on home particles */
1513                 matrix            pcoupl_mu,
1514                 t_nrnb           *nrnb,
1515                 gmx_update_t     *upd)
1516 {
1517     double               dt;
1518     int                  start, homenr, i, n, m;
1519
1520     start  = 0;
1521     homenr = md->homenr;
1522
1523     dt = inputrec->delta_t;
1524
1525     where();
1526
1527     /* now update boxes */
1528     switch (inputrec->epc)
1529     {
1530         case (epcNO):
1531             break;
1532         case (epcBERENDSEN):
1533             /* We should only scale after a step where the pressure (kinetic
1534              * energy and virial) was calculated. This happens after the
1535              * coordinate update, whereas the current routine is called before
1536              * that, so we scale when step % nstpcouple = 1 instead of 0.
1537              */
1538             if (inputrec->nstpcouple == 1 || (step % inputrec->nstpcouple == 1))
1539             {
1540                 berendsen_pscale(inputrec, pcoupl_mu, state->box, state->box_rel,
1541                                  start, homenr, state->x, md->cFREEZE, nrnb);
1542             }
1543             break;
1544         case (epcPARRINELLORAHMAN):
1545             /* The box velocities were updated in do_pr_pcoupl in the update
1546              * iteration, but we dont change the box vectors until we get here
1547              * since we need to be able to shift/unshift above.
1548              */
1549             for (i = 0; i < DIM; i++)
1550             {
1551                 for (m = 0; m <= i; m++)
1552                 {
1553                     state->box[i][m] += dt*state->boxv[i][m];
1554                 }
1555             }
1556             preserve_box_shape(inputrec, state->box_rel, state->box);
1557
1558             /* Scale the coordinates */
1559             for (n = start; (n < start+homenr); n++)
1560             {
1561                 tmvmul_ur0(pcoupl_mu, state->x[n], state->x[n]);
1562             }
1563             break;
1564         case (epcMTTK):
1565             switch (inputrec->epct)
1566             {
1567                 case (epctISOTROPIC):
1568                     /* DIM * eta = ln V.  so DIM*eta_new = DIM*eta_old + DIM*dt*veta =>
1569                        ln V_new = ln V_old + 3*dt*veta => V_new = V_old*exp(3*dt*veta) =>
1570                        Side length scales as exp(veta*dt) */
1571
1572                     msmul(state->box, exp(state->veta*dt), state->box);
1573
1574                     /* Relate veta to boxv.  veta = d(eta)/dT = (1/DIM)*1/V dV/dT.
1575                        o               If we assume isotropic scaling, and box length scaling
1576                        factor L, then V = L^DIM (det(M)).  So dV/dt = DIM
1577                        L^(DIM-1) dL/dt det(M), and veta = (1/L) dL/dt.  The
1578                        determinant of B is L^DIM det(M), and the determinant
1579                        of dB/dt is (dL/dT)^DIM det (M).  veta will be
1580                        (det(dB/dT)/det(B))^(1/3).  Then since M =
1581                        B_new*(vol_new)^(1/3), dB/dT_new = (veta_new)*B(new). */
1582
1583                     msmul(state->box, state->veta, state->boxv);
1584                     break;
1585                 default:
1586                     break;
1587             }
1588             break;
1589         default:
1590             break;
1591     }
1592
1593     if (inputrecDeform(inputrec))
1594     {
1595         deform(upd, start, homenr, state->x, state->box, inputrec, step);
1596     }
1597     where();
1598     dump_it_all(fplog, "After update",
1599                 state->natoms, state->x, upd->xp, state->v, force);
1600 }
1601
1602 void update_coords(FILE             *fplog,
1603                    gmx_int64_t       step,
1604                    t_inputrec       *inputrec,  /* input record and box stuff   */
1605                    t_mdatoms        *md,
1606                    t_state          *state,
1607                    rvec             *f,    /* forces on home particles */
1608                    t_fcdata         *fcd,
1609                    gmx_ekindata_t   *ekind,
1610                    matrix            M,
1611                    gmx_update_t     *upd,
1612                    int               UpdatePart,
1613                    t_commrec        *cr, /* these shouldn't be here -- need to think about it */
1614                    gmx_constr_t      constr)
1615 {
1616     gmx_bool          bNH, bPR, bDoConstr = FALSE;
1617     double            dt, alpha;
1618     int               start, homenr, nrend;
1619     int               nth, th;
1620
1621     bDoConstr = (NULL != constr);
1622
1623     /* Running the velocity half does nothing except for velocity verlet */
1624     if ((UpdatePart == etrtVELOCITY1 || UpdatePart == etrtVELOCITY2) &&
1625         !EI_VV(inputrec->eI))
1626     {
1627         gmx_incons("update_coords called for velocity without VV integrator");
1628     }
1629
1630     start  = 0;
1631     homenr = md->homenr;
1632     nrend  = start+homenr;
1633
1634     dt   = inputrec->delta_t;
1635
1636     /* We need to update the NMR restraint history when time averaging is used */
1637     if (state->flags & (1<<estDISRE_RM3TAV))
1638     {
1639         update_disres_history(fcd, &state->hist);
1640     }
1641     if (state->flags & (1<<estORIRE_DTAV))
1642     {
1643         update_orires_history(fcd, &state->hist);
1644     }
1645
1646
1647     bNH = inputrec->etc == etcNOSEHOOVER;
1648     bPR = ((inputrec->epc == epcPARRINELLORAHMAN) || (inputrec->epc == epcMTTK));
1649
1650     /* ############# START The update of velocities and positions ######### */
1651     where();
1652     dump_it_all(fplog, "Before update",
1653                 state->natoms, state->x, upd->xp, state->v, f);
1654
1655     nth = gmx_omp_nthreads_get(emntUpdate);
1656
1657 #pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
1658     for (th = 0; th < nth; th++)
1659     {
1660         try
1661         {
1662             int start_th, end_th;
1663
1664             start_th = start + ((nrend-start)* th   )/nth;
1665             end_th   = start + ((nrend-start)*(th+1))/nth;
1666
1667             switch (inputrec->eI)
1668             {
1669                 case (eiMD):
1670                     if (ekind->cosacc.cos_accel == 0)
1671                     {
1672                         do_update_md(start_th, end_th, dt,
1673                                      ekind->tcstat, state->nosehoover_vxi,
1674                                      ekind->bNEMD, ekind->grpstat, inputrec->opts.acc,
1675                                      inputrec->opts.nFreeze,
1676                                      md->invmass, md->ptype,
1677                                      md->cFREEZE, md->cACC, md->cTC,
1678                                      state->x, upd->xp, state->v, f, M,
1679                                      bNH, bPR);
1680                     }
1681                     else
1682                     {
1683                         do_update_visc(start_th, end_th, dt,
1684                                        ekind->tcstat, state->nosehoover_vxi,
1685                                        md->invmass, md->ptype,
1686                                        md->cTC, state->x, upd->xp, state->v, f, M,
1687                                        state->box,
1688                                        ekind->cosacc.cos_accel,
1689                                        ekind->cosacc.vcos,
1690                                        bNH, bPR);
1691                     }
1692                     break;
1693                 case (eiSD1):
1694                     /* With constraints, the SD1 update is done in 2 parts */
1695                     do_update_sd1(upd->sd,
1696                                   start_th, end_th, dt,
1697                                   inputrec->opts.acc, inputrec->opts.nFreeze,
1698                                   md->invmass, md->ptype,
1699                                   md->cFREEZE, md->cACC, md->cTC,
1700                                   state->x, upd->xp, state->v, f,
1701                                   bDoConstr, TRUE,
1702                                   step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
1703                     break;
1704                 case (eiBD):
1705                     do_update_bd(start_th, end_th, dt,
1706                                  inputrec->opts.nFreeze, md->invmass, md->ptype,
1707                                  md->cFREEZE, md->cTC,
1708                                  state->x, upd->xp, state->v, f,
1709                                  inputrec->bd_fric,
1710                                  upd->sd->bd_rf,
1711                                  step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
1712                     break;
1713                 case (eiVV):
1714                 case (eiVVAK):
1715                     alpha = 1.0 + DIM/((double)inputrec->opts.nrdf[0]); /* assuming barostat coupled to group 0. */
1716                     switch (UpdatePart)
1717                     {
1718                         case etrtVELOCITY1:
1719                         case etrtVELOCITY2:
1720                             do_update_vv_vel(start_th, end_th, dt,
1721                                              inputrec->opts.acc, inputrec->opts.nFreeze,
1722                                              md->invmass, md->ptype,
1723                                              md->cFREEZE, md->cACC,
1724                                              state->v, f,
1725                                              (bNH || bPR), state->veta, alpha);
1726                             break;
1727                         case etrtPOSITION:
1728                             do_update_vv_pos(start_th, end_th, dt,
1729                                              inputrec->opts.nFreeze,
1730                                              md->ptype, md->cFREEZE,
1731                                              state->x, upd->xp, state->v,
1732                                              (bNH || bPR), state->veta);
1733                             break;
1734                     }
1735                     break;
1736                 default:
1737                     gmx_fatal(FARGS, "Don't know how to update coordinates");
1738                     break;
1739             }
1740         }
1741         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1742     }
1743
1744 }
1745
1746
1747 void correct_ekin(FILE *log, int start, int end, rvec v[], rvec vcm, real mass[],
1748                   real tmass, tensor ekin)
1749 {
1750     /*
1751      * This is a debugging routine. It should not be called for production code
1752      *
1753      * The kinetic energy should calculated according to:
1754      *   Ekin = 1/2 m (v-vcm)^2
1755      * However the correction is not always applied, since vcm may not be
1756      * known in time and we compute
1757      *   Ekin' = 1/2 m v^2 instead
1758      * This can be corrected afterwards by computing
1759      *   Ekin = Ekin' + 1/2 m ( -2 v vcm + vcm^2)
1760      * or in hsorthand:
1761      *   Ekin = Ekin' - m v vcm + 1/2 m vcm^2
1762      */
1763     int    i, j, k;
1764     real   m, tm;
1765     rvec   hvcm, mv;
1766     tensor dekin;
1767
1768     /* Local particles */
1769     clear_rvec(mv);
1770
1771     /* Processor dependent part. */
1772     tm = 0;
1773     for (i = start; (i < end); i++)
1774     {
1775         m      = mass[i];
1776         tm    += m;
1777         for (j = 0; (j < DIM); j++)
1778         {
1779             mv[j] += m*v[i][j];
1780         }
1781     }
1782     /* Shortcut */
1783     svmul(1/tmass, vcm, vcm);
1784     svmul(0.5, vcm, hvcm);
1785     clear_mat(dekin);
1786     for (j = 0; (j < DIM); j++)
1787     {
1788         for (k = 0; (k < DIM); k++)
1789         {
1790             dekin[j][k] += vcm[k]*(tm*hvcm[j]-mv[j]);
1791         }
1792     }
1793     pr_rvecs(log, 0, "dekin", dekin, DIM);
1794     pr_rvecs(log, 0, " ekin", ekin, DIM);
1795     fprintf(log, "dekin = %g, ekin = %g  vcm = (%8.4f %8.4f %8.4f)\n",
1796             trace(dekin), trace(ekin), vcm[XX], vcm[YY], vcm[ZZ]);
1797     fprintf(log, "mv = (%8.4f %8.4f %8.4f)\n",
1798             mv[XX], mv[YY], mv[ZZ]);
1799 }
1800
1801 extern gmx_bool update_randomize_velocities(t_inputrec *ir, gmx_int64_t step, const t_commrec *cr,
1802                                             t_mdatoms *md, t_state *state, gmx_update_t *upd, gmx_constr_t constr)
1803 {
1804
1805     real rate = (ir->delta_t)/ir->opts.tau_t[0];
1806
1807     if (ir->etc == etcANDERSEN && constr != NULL)
1808     {
1809         /* Currently, Andersen thermostat does not support constrained
1810            systems. Functionality exists in the andersen_tcoupl
1811            function in GROMACS 4.5.7 to allow this combination. That
1812            code could be ported to the current random-number
1813            generation approach, but has not yet been done because of
1814            lack of time and resources. */
1815         gmx_fatal(FARGS, "Normal Andersen is currently not supported with constraints, use massive Andersen instead");
1816     }
1817
1818     /* proceed with andersen if 1) it's fixed probability per
1819        particle andersen or 2) it's massive andersen and it's tau_t/dt */
1820     if ((ir->etc == etcANDERSEN) || do_per_step(step, (int)(1.0/rate)))
1821     {
1822         andersen_tcoupl(ir, step, cr, md, state, rate,
1823                         upd->sd->randomize_group, upd->sd->boltzfac);
1824         return TRUE;
1825     }
1826     return FALSE;
1827 }