src/gromacs/mdlib/clincs.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15  *
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 /* This file is completely threadsafe - keep it that way! */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include "main.h"
  43 #include "constr.h"
  44 #include "copyrite.h"
  45 #include "physics.h"
  46 #include "vec.h"
  47 #include "pbc.h"
  48 #include "smalloc.h"
  49 #include "mdrun.h"
  50 #include "nrnb.h"
  51 #include "domdec.h"
  52 #include "partdec.h"
  53 #include "mtop_util.h"
  54 #include "gmxfio.h"
  55 #include "gmx_omp_nthreads.h"
  56 #include "gmx_omp.h"
  57
  58 typedef struct {
  59     int b0;           /* first constraint for this thread */
  60     int b1;           /* b1-1 is the last constraint for this thread */
  61     int nind;         /* number of indices */
  62     int *ind;         /* constraint index for updating atom data */
  63     int nind_r;       /* number of indices */
  64     int *ind_r;       /* constraint index for updating atom data */
  65     int ind_nalloc;   /* allocation size of ind and ind_r */
  66     tensor vir_r_m_dr;/* temporary variable for virial calculation */
  67 } lincs_thread_t;
  68
  69 typedef struct gmx_lincsdata {
  70     int  ncg;         /* the global number of constraints */
  71     int  ncg_flex;    /* the global number of flexible constraints */
  72     int  ncg_triangle;/* the global number of constraints in triangles */
  73     int  nIter;       /* the number of iterations */
  74     int  nOrder;      /* the order of the matrix expansion */
  75     int  nc;          /* the number of constraints */
  76     int  nc_alloc;    /* the number we allocated memory for */
  77     int  ncc;         /* the number of constraint connections */
  78     int  ncc_alloc;   /* the number we allocated memory for */
  79     real matlam;      /* the FE lambda value used for filling blc and blmf */
  80     real *bllen0;     /* the reference distance in topology A */
  81     real *ddist;      /* the reference distance in top B - the r.d. in top A */
  82     int  *bla;        /* the atom pairs involved in the constraints */
  83     real *blc;        /* 1/sqrt(invmass1 + invmass2) */
  84     real *blc1;       /* as blc, but with all masses 1 */
  85     int  *blnr;       /* index into blbnb and blmf */
  86     int  *blbnb;      /* list of constraint connections */
  87     int  ntriangle;   /* the local number of constraints in triangles */
  88     int  *triangle;   /* the list of triangle constraints */
  89     int  *tri_bits;   /* the bits tell if the matrix element should be used */
  90     int  ncc_triangle;/* the number of constraint connections in triangles */
  91     real *blmf;       /* matrix of mass factors for constraint connections */
  92     real *blmf1;      /* as blmf, but with all masses 1 */
  93     real *bllen;      /* the reference bond length */
  94     int  nth;         /* The number of threads doing LINCS */
  95     lincs_thread_t *th; /* LINCS thread division */
  96     unsigned *atf;    /* atom flags for thread parallelization */
  97     int  atf_nalloc;  /* allocation size of atf */
  98     /* arrays for temporary storage in the LINCS algorithm */
  99     rvec *tmpv;
 100     real *tmpncc;
 101     real *tmp1;
 102     real *tmp2;
 103     real *tmp3;
 104     real *tmp4;
 105     real *mlambda;  /* the Lagrange multipliers * -1 */
 106     /* storage for the constraint RMS relative deviation output */
 107     real rmsd_data[3];
 108 } t_gmx_lincsdata;
 109
 110 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
 111 {
 112     return lincsd->rmsd_data;
 113 }
 114
 115 real lincs_rmsd(struct gmx_lincsdata *lincsd,gmx_bool bSD2)
 116 {
 117     if (lincsd->rmsd_data[0] > 0)
 118     {
 119         return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
 120     }
 121     else
 122     {
 123         return 0;
 124     }
 125 }
 126
 127 /* Do a set of nrec LINCS matrix multiplications.
 128  * This function will return with up to date thread-local
 129  * constraint data, without an OpenMP barrier.
 130  */
 131 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
 132                                 int b0,int b1,
 133                                 const real *blcc,
 134                                 real *rhs1,real *rhs2,real *sol)
 135 {
 136     int  nrec,rec,b,j,n,nr0,nr1;
 137     real mvb,*swap;
 138     int  ntriangle,tb,bits;
 139     const int *blnr=lincsd->blnr,*blbnb=lincsd->blbnb;
 140     const int *triangle=lincsd->triangle,*tri_bits=lincsd->tri_bits;
 141
 142     ntriangle = lincsd->ntriangle;
 143     nrec      = lincsd->nOrder;
 144
 145     for(rec=0; rec<nrec; rec++)
 146     {
 147 #pragma omp barrier
 148         for(b=b0; b<b1; b++)
 149         {
 150             mvb = 0;
 151             for(n=blnr[b]; n<blnr[b+1]; n++)
 152             {
 153                 j = blbnb[n];
 154                 mvb = mvb + blcc[n]*rhs1[j];
 155             }
 156             rhs2[b] = mvb;
 157             sol[b]  = sol[b] + mvb;
 158         }
 159         swap = rhs1;
 160         rhs1 = rhs2;
 161         rhs2 = swap;
 162     } /* nrec*(ncons+2*nrtot) flops */
 163
 164     if (ntriangle > 0)
 165     {
 166         /* Perform an extra nrec recursions for only the constraints
 167          * involved in rigid triangles.
 168          * In this way their accuracy should come close to those of the other
 169          * constraints, since traingles of constraints can produce eigenvalues
 170          * around 0.7, while the effective eigenvalue for bond constraints
 171          * is around 0.4 (and 0.7*0.7=0.5).
 172          */
 173         /* We need to copy the temporary array, since only the elements
 174          * for constraints involved in triangles are updated and then
 175          * the pointers are swapped. This saving copying the whole arrary.
 176          * We need barrier as other threads might still be reading from rhs2.
 177          */
 178 #pragma omp barrier
 179         for(b=b0; b<b1; b++)
 180         {
 181             rhs2[b] = rhs1[b];
 182         }
 183 #pragma omp barrier
 184 #pragma omp master
 185         {
 186             for(rec=0; rec<nrec; rec++)
 187             {
 188                 for(tb=0; tb<ntriangle; tb++)
 189                 {
 190                     b    = triangle[tb];
 191                     bits = tri_bits[tb];
 192                     mvb = 0;
 193                     nr0 = blnr[b];
 194                     nr1 = blnr[b+1];
 195                     for(n=nr0; n<nr1; n++)
 196                     {
 197                         if (bits & (1<<(n-nr0)))
 198                         {
 199                             j = blbnb[n];
 200                             mvb = mvb + blcc[n]*rhs1[j];
 201                         }
 202                     }
 203                     rhs2[b] = mvb;
 204                     sol[b]  = sol[b] + mvb;
 205                 }
 206                 swap = rhs1;
 207                 rhs1 = rhs2;
 208                 rhs2 = swap;
 209             }
 210         } /* flops count is missing here */
 211
 212         /* We need a barrier here as the calling routine will continue
 213          * to operate on the thread-local constraints without barrier.
 214          */
 215 #pragma omp barrier
 216     }
 217 }
 218
 219 static void lincs_update_atoms_noind(int ncons,const int *bla,
 220                                      real prefac,
 221                                      const real *fac,rvec *r,
 222                                      const real *invmass,
 223                                      rvec *x)
 224 {
 225     int  b,i,j;
 226     real mvb,im1,im2,tmp0,tmp1,tmp2;
 227
 228     for(b=0; b<ncons; b++)
 229     {
 230         i = bla[2*b];
 231         j = bla[2*b+1];
 232         mvb = prefac*fac[b];
 233         im1 = invmass[i];
 234         im2 = invmass[j];
 235         tmp0 = r[b][0]*mvb;
 236         tmp1 = r[b][1]*mvb;
 237         tmp2 = r[b][2]*mvb;
 238         x[i][0] -= tmp0*im1;
 239         x[i][1] -= tmp1*im1;
 240         x[i][2] -= tmp2*im1;
 241         x[j][0] += tmp0*im2;
 242         x[j][1] += tmp1*im2;
 243         x[j][2] += tmp2*im2;
 244     } /* 16 ncons flops */
 245 }
 246
 247 static void lincs_update_atoms_ind(int ncons,const int *ind,const int *bla,
 248                                    real prefac,
 249                                    const real *fac,rvec *r,
 250                                    const real *invmass,
 251                                    rvec *x)
 252 {
 253     int  bi,b,i,j;
 254     real mvb,im1,im2,tmp0,tmp1,tmp2;
 255
 256     for(bi=0; bi<ncons; bi++)
 257     {
 258         b = ind[bi];
 259         i = bla[2*b];
 260         j = bla[2*b+1];
 261         mvb = prefac*fac[b];
 262         im1 = invmass[i];
 263         im2 = invmass[j];
 264         tmp0 = r[b][0]*mvb;
 265         tmp1 = r[b][1]*mvb;
 266         tmp2 = r[b][2]*mvb;
 267         x[i][0] -= tmp0*im1;
 268         x[i][1] -= tmp1*im1;
 269         x[i][2] -= tmp2*im1;
 270         x[j][0] += tmp0*im2;
 271         x[j][1] += tmp1*im2;
 272         x[j][2] += tmp2*im2;
 273     } /* 16 ncons flops */
 274 }
 275
 276 static void lincs_update_atoms(struct gmx_lincsdata *li,int th,
 277                                real prefac,
 278                                const real *fac,rvec *r,
 279                                const real *invmass,
 280                                rvec *x)
 281 {
 282     if (li->nth == 1)
 283     {
 284         /* Single thread, we simply update for all constraints */
 285         lincs_update_atoms_noind(li->nc,li->bla,prefac,fac,r,invmass,x);
 286     }
 287     else
 288     {
 289         /* Update the atom vector components for our thread local
 290          * constraints that only access our local atom range.
 291          * This can be done without a barrier.
 292          */
 293         lincs_update_atoms_ind(li->th[th].nind,li->th[th].ind,
 294                                li->bla,prefac,fac,r,invmass,x);
 295
 296         if (li->th[li->nth].nind > 0)
 297         {
 298             /* Update the constraints that operate on atoms
 299              * in multiple thread atom blocks on the master thread.
 300              */
 301 #pragma omp barrier
 302 #pragma omp master
 303             {
 304                 lincs_update_atoms_ind(li->th[li->nth].nind,
 305                                        li->th[li->nth].ind,
 306                                        li->bla,prefac,fac,r,invmass,x);
 307             }
 308         }
 309     }
 310 }
 311
 312 /* LINCS projection, works on derivatives of the coordinates */
 313 static void do_lincsp(rvec *x,rvec *f,rvec *fp,t_pbc *pbc,
 314                       struct gmx_lincsdata *lincsd,int th,
 315                       real *invmass,
 316                       int econq,real *dvdlambda,
 317                       gmx_bool bCalcVir,tensor rmdf)
 318 {
 319     int     b0,b1,b,i,j,k,n;
 320     real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,wfac,lam;
 321     rvec    dx;
 322     int     *bla,*blnr,*blbnb;
 323     rvec    *r;
 324     real    *blc,*blmf,*blcc,*rhs1,*rhs2,*sol;
 325
 326     b0 = lincsd->th[th].b0;
 327     b1 = lincsd->th[th].b1;
 328
 329     bla    = lincsd->bla;
 330     r      = lincsd->tmpv;
 331     blnr   = lincsd->blnr;
 332     blbnb  = lincsd->blbnb;
 333     if (econq != econqForce)
 334     {
 335         /* Use mass-weighted parameters */
 336         blc  = lincsd->blc;
 337         blmf = lincsd->blmf;
 338     }
 339     else
 340     {
 341         /* Use non mass-weighted parameters */
 342         blc  = lincsd->blc1;
 343         blmf = lincsd->blmf1;
 344     }
 345     blcc   = lincsd->tmpncc;
 346     rhs1   = lincsd->tmp1;
 347     rhs2   = lincsd->tmp2;
 348     sol    = lincsd->tmp3;
 349
 350     /* Compute normalized i-j vectors */
 351     if (pbc)
 352     {
 353         for(b=b0; b<b1; b++)
 354         {
 355             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
 356             unitv(dx,r[b]);
 357         }
 358     }
 359     else
 360     {
 361         for(b=b0; b<b1; b++)
 362         {
 363             rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
 364             unitv(dx,r[b]);
 365         } /* 16 ncons flops */
 366     }
 367
 368 #pragma omp barrier
 369     for(b=b0; b<b1; b++)
 370     {
 371         tmp0 = r[b][0];
 372         tmp1 = r[b][1];
 373         tmp2 = r[b][2];
 374         i = bla[2*b];
 375         j = bla[2*b+1];
 376         for(n=blnr[b]; n<blnr[b+1]; n++)
 377         {
 378             k = blbnb[n];
 379             blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 380         } /* 6 nr flops */
 381         mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
 382                       tmp1*(f[i][1] - f[j][1]) +
 383                       tmp2*(f[i][2] - f[j][2]));
 384         rhs1[b] = mvb;
 385         sol[b]  = mvb;
 386         /* 7 flops */
 387     }
 388     /* Together: 23*ncons + 6*nrtot flops */
 389
 390     lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 391     /* nrec*(ncons+2*nrtot) flops */
 392
 393     if (econq == econqDeriv_FlexCon)
 394     {
 395         /* We only want to constraint the flexible constraints,
 396          * so we mask out the normal ones by setting sol to 0.
 397          */
 398         for(b=b0; b<b1; b++)
 399         {
 400             if (!(lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
 401             {
 402                 sol[b] = 0;
 403             }
 404         }
 405     }
 406
 407     if (econq != econqForce)
 408     {
 409         lincs_update_atoms(lincsd,th,1.0,sol,r,invmass,fp);
 410     }
 411     else
 412     {
 413         for(b=b0; b<b1; b++)
 414         {
 415             i = bla[2*b];
 416             j = bla[2*b+1];
 417             mvb = blc[b]*sol[b];
 418             tmp0 = r[b][0]*mvb;
 419             tmp1 = r[b][1]*mvb;
 420             tmp2 = r[b][2]*mvb;
 421             fp[i][0] -= tmp0;
 422             fp[i][1] -= tmp1;
 423             fp[i][2] -= tmp2;
 424             fp[j][0] += tmp0;
 425             fp[j][1] += tmp1;
 426             fp[j][2] += tmp2;
 427         }
 428
 429         if (dvdlambda != NULL)
 430         {
 431 #pragma omp barrier
 432             for(b=b0; b<b1; b++)
 433             {
 434                 *dvdlambda -= blc[b]*sol[b]*lincsd->ddist[b];
 435             }
 436         }
 437         /* 10 ncons flops */
 438     }
 439
 440     if (bCalcVir)
 441     {
 442         /* Constraint virial,
 443          * determines sum r_bond x delta f,
 444          * where delta f is the constraint correction
 445          * of the quantity that is being constrained.
 446          */
 447         for(b=b0; b<b1; b++)
 448         {
 449             mvb = lincsd->bllen[b]*blc[b]*sol[b];
 450             for(i=0; i<DIM; i++)
 451             {
 452                 tmp1 = mvb*r[b][i];
 453                 for(j=0; j<DIM; j++)
 454                 {
 455                     rmdf[i][j] += tmp1*r[b][j];
 456                 }
 457             }
 458         } /* 23 ncons flops */
 459     }
 460 }
 461
 462 static void do_lincs(rvec *x,rvec *xp,matrix box,t_pbc *pbc,
 463                      struct gmx_lincsdata *lincsd,int th,
 464                      real *invmass,
 465                                          t_commrec *cr,
 466                      gmx_bool bCalcLambda,
 467                      real wangle,int *warn,
 468                      real invdt,rvec *v,
 469                      gmx_bool bCalcVir,tensor vir_r_m_dr)
 470 {
 471     int     b0,b1,b,i,j,k,n,iter;
 472     real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,len2,dlen2,wfac;
 473     rvec    dx;
 474     int     *bla,*blnr,*blbnb;
 475     rvec    *r;
 476     real    *blc,*blmf,*bllen,*blcc,*rhs1,*rhs2,*sol,*blc_sol,*mlambda;
 477     int     *nlocat;
 478
 479     b0 = lincsd->th[th].b0;
 480     b1 = lincsd->th[th].b1;
 481
 482     bla    = lincsd->bla;
 483     r      = lincsd->tmpv;
 484     blnr   = lincsd->blnr;
 485     blbnb  = lincsd->blbnb;
 486     blc    = lincsd->blc;
 487     blmf   = lincsd->blmf;
 488     bllen  = lincsd->bllen;
 489     blcc   = lincsd->tmpncc;
 490     rhs1   = lincsd->tmp1;
 491     rhs2   = lincsd->tmp2;
 492     sol    = lincsd->tmp3;
 493     blc_sol= lincsd->tmp4;
 494     mlambda= lincsd->mlambda;
 495
 496     if (DOMAINDECOMP(cr) && cr->dd->constraints)
 497     {
 498         nlocat = dd_constraints_nlocalatoms(cr->dd);
 499     }
 500     else if (PARTDECOMP(cr))
 501     {
 502         nlocat = pd_constraints_nlocalatoms(cr->pd);
 503     }
 504     else
 505     {
 506         nlocat = NULL;
 507     }
 508
 509     if (pbc)
 510     {
 511         /* Compute normalized i-j vectors */
 512         for(b=b0; b<b1; b++)
 513         {
 514             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
 515             unitv(dx,r[b]);
 516         }
 517 #pragma omp barrier
 518         for(b=b0; b<b1; b++)
 519         {
 520             for(n=blnr[b]; n<blnr[b+1]; n++)
 521             {
 522                 blcc[n] = blmf[n]*iprod(r[b],r[blbnb[n]]);
 523             }
 524             pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
 525             mvb = blc[b]*(iprod(r[b],dx) - bllen[b]);
 526             rhs1[b] = mvb;
 527             sol[b]  = mvb;
 528         }
 529     }
 530     else
 531     {
 532         /* Compute normalized i-j vectors */
 533         for(b=b0; b<b1; b++)
 534         {
 535             i = bla[2*b];
 536             j = bla[2*b+1];
 537             tmp0 = x[i][0] - x[j][0];
 538             tmp1 = x[i][1] - x[j][1];
 539             tmp2 = x[i][2] - x[j][2];
 540             rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
 541             r[b][0] = rlen*tmp0;
 542             r[b][1] = rlen*tmp1;
 543             r[b][2] = rlen*tmp2;
 544         } /* 16 ncons flops */
 545
 546 #pragma omp barrier
 547         for(b=b0; b<b1; b++)
 548         {
 549             tmp0 = r[b][0];
 550             tmp1 = r[b][1];
 551             tmp2 = r[b][2];
 552             len = bllen[b];
 553             i = bla[2*b];
 554             j = bla[2*b+1];
 555             for(n=blnr[b]; n<blnr[b+1]; n++)
 556             {
 557                 k = blbnb[n];
 558                 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 559             } /* 6 nr flops */
 560             mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
 561                           tmp1*(xp[i][1] - xp[j][1]) +
 562                           tmp2*(xp[i][2] - xp[j][2]) - len);
 563             rhs1[b] = mvb;
 564             sol[b]  = mvb;
 565             /* 10 flops */
 566         }
 567         /* Together: 26*ncons + 6*nrtot flops */
 568     }
 569
 570     lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 571     /* nrec*(ncons+2*nrtot) flops */
 572
 573     for(b=b0; b<b1; b++)
 574     {
 575         mlambda[b] = blc[b]*sol[b];
 576     }
 577
 578     /* Update the coordinates */
 579     lincs_update_atoms(lincsd,th,1.0,mlambda,r,invmass,xp);
 580
 581     /*
 582      ********  Correction for centripetal effects  ********
 583      */
 584
 585     wfac = cos(DEG2RAD*wangle);
 586     wfac = wfac*wfac;
 587
 588     for(iter=0; iter<lincsd->nIter; iter++)
 589     {
 590         if ((DOMAINDECOMP(cr) && cr->dd->constraints) ||
 591             PARTDECOMP(cr))
 592         {
 593 #pragma omp barrier
 594 #pragma omp master
 595             {
 596                  /* Communicate the corrected non-local coordinates */
 597                 if (DOMAINDECOMP(cr))
 598                 {
 599                     dd_move_x_constraints(cr->dd,box,xp,NULL);
 600                 }
 601                 else
 602                 {
 603                     pd_move_x_constraints(cr,xp,NULL);
 604                 }
 605             }
 606         }
 607
 608 #pragma omp barrier
 609         for(b=b0; b<b1; b++)
 610         {
 611             len = bllen[b];
 612             if (pbc)
 613             {
 614                 pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
 615             }
 616             else
 617             {
 618                 rvec_sub(xp[bla[2*b]],xp[bla[2*b+1]],dx);
 619             }
 620             len2 = len*len;
 621             dlen2 = 2*len2 - norm2(dx);
 622             if (dlen2 < wfac*len2 && (nlocat==NULL || nlocat[b]))
 623             {
 624                 *warn = b;
 625             }
 626             if (dlen2 > 0)
 627             {
 628                 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
 629             }
 630             else
 631             {
 632                 mvb = blc[b]*len;
 633             }
 634             rhs1[b] = mvb;
 635             sol[b]  = mvb;
 636         } /* 20*ncons flops */
 637
 638         lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 639         /* nrec*(ncons+2*nrtot) flops */
 640
 641         for(b=b0; b<b1; b++)
 642         {
 643             mvb = blc[b]*sol[b];
 644             blc_sol[b]  = mvb;
 645             mlambda[b] += mvb;
 646         }
 647
 648         /* Update the coordinates */
 649         lincs_update_atoms(lincsd,th,1.0,blc_sol,r,invmass,xp);
 650     }
 651     /* nit*ncons*(37+9*nrec) flops */
 652
 653     if (v != NULL)
 654     {
 655         /* Update the velocities */
 656         lincs_update_atoms(lincsd,th,invdt,mlambda,r,invmass,v);
 657         /* 16 ncons flops */
 658     }
 659
 660     if (nlocat != NULL && bCalcLambda)
 661     {
 662         /* In lincs_update_atoms thread might cross-read mlambda */
 663 #pragma omp barrier
 664
 665         /* Only account for local atoms */
 666         for(b=b0; b<b1; b++)
 667         {
 668             mlambda[b] *= 0.5*nlocat[b];
 669         }
 670     }
 671
 672     if (bCalcVir)
 673     {
 674         /* Constraint virial */
 675         for(b=b0; b<b1; b++)
 676         {
 677             tmp0 = -bllen[b]*mlambda[b];
 678             for(i=0; i<DIM; i++)
 679             {
 680                 tmp1 = tmp0*r[b][i];
 681                 for(j=0; j<DIM; j++)
 682                 {
 683                     vir_r_m_dr[i][j] -= tmp1*r[b][j];
 684                 }
 685             }
 686         } /* 22 ncons flops */
 687     }
 688
 689     /* Total:
 690      * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
 691      * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
 692      *
 693      * (26+nrec)*ncons + (6+2*nrec)*nrtot
 694      * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
 695      * if nit=1
 696      * (63+nrec)*ncons + (6+4*nrec)*nrtot
 697      */
 698 }
 699
 700 void set_lincs_matrix(struct gmx_lincsdata *li,real *invmass,real lambda)
 701 {
 702     int i,a1,a2,n,k,sign,center;
 703     int end,nk,kk;
 704     const real invsqrt2=0.7071067811865475244;
 705
 706     for(i=0; (i<li->nc); i++)
 707     {
 708         a1 = li->bla[2*i];
 709         a2 = li->bla[2*i+1];
 710         li->blc[i]  = gmx_invsqrt(invmass[a1] + invmass[a2]);
 711         li->blc1[i] = invsqrt2;
 712     }
 713
 714     /* Construct the coupling coefficient matrix blmf */
 715     li->ntriangle = 0;
 716     li->ncc_triangle = 0;
 717     for(i=0; (i<li->nc); i++)
 718     {
 719         a1 = li->bla[2*i];
 720         a2 = li->bla[2*i+1];
 721         for(n=li->blnr[i]; (n<li->blnr[i+1]); n++)
 722         {
 723             k = li->blbnb[n];
 724             if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
 725             {
 726                 sign = -1;
 727             }
 728             else
 729             {
 730                 sign = 1;
 731             }
 732             if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
 733             {
 734                 center = a1;
 735                 end    = a2;
 736             }
 737             else
 738             {
 739                 center = a2;
 740                 end    = a1;
 741             }
 742             li->blmf[n]  = sign*invmass[center]*li->blc[i]*li->blc[k];
 743             li->blmf1[n] = sign*0.5;
 744             if (li->ncg_triangle > 0)
 745             {
 746                 /* Look for constraint triangles */
 747                 for(nk=li->blnr[k]; (nk<li->blnr[k+1]); nk++)
 748                 {
 749                     kk = li->blbnb[nk];
 750                     if (kk != i && kk != k &&
 751                         (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
 752                     {
 753                         if (li->ntriangle == 0 ||
 754                             li->triangle[li->ntriangle-1] < i)
 755                         {
 756                             /* Add this constraint to the triangle list */
 757                             li->triangle[li->ntriangle] = i;
 758                             li->tri_bits[li->ntriangle] = 0;
 759                             li->ntriangle++;
 760                             if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
 761                             {
 762                                 gmx_fatal(FARGS,"A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
 763                                           li->blnr[i+1] - li->blnr[i],
 764                                           sizeof(li->tri_bits[0])*8-1);
 765                             }
 766                         }
 767                         li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
 768                         li->ncc_triangle++;
 769                     }
 770                 }
 771             }
 772         }
 773     }
 774
 775     if (debug)
 776     {
 777         fprintf(debug,"Of the %d constraints %d participate in triangles\n",
 778                 li->nc,li->ntriangle);
 779         fprintf(debug,"There are %d couplings of which %d in triangles\n",
 780                 li->ncc,li->ncc_triangle);
 781     }
 782
 783     /* Set matlam,
 784      * so we know with which lambda value the masses have been set.
 785      */
 786     li->matlam = lambda;
 787 }
 788
 789 static int count_triangle_constraints(t_ilist *ilist,t_blocka *at2con)
 790 {
 791     int  ncon1,ncon_tot;
 792     int  c0,a00,a01,n1,c1,a10,a11,ac1,n2,c2,a20,a21;
 793     int  ncon_triangle;
 794     gmx_bool bTriangle;
 795     t_iatom *ia1,*ia2,*iap;
 796
 797     ncon1    = ilist[F_CONSTR].nr/3;
 798     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 799
 800     ia1 = ilist[F_CONSTR].iatoms;
 801     ia2 = ilist[F_CONSTRNC].iatoms;
 802
 803     ncon_triangle = 0;
 804     for(c0=0; c0<ncon_tot; c0++)
 805     {
 806         bTriangle = FALSE;
 807         iap = constr_iatomptr(ncon1,ia1,ia2,c0);
 808         a00 = iap[1];
 809         a01 = iap[2];
 810         for(n1=at2con->index[a01]; n1<at2con->index[a01+1]; n1++)
 811         {
 812             c1 = at2con->a[n1];
 813             if (c1 != c0)
 814             {
 815                 iap = constr_iatomptr(ncon1,ia1,ia2,c1);
 816                 a10 = iap[1];
 817                 a11 = iap[2];
 818                 if (a10 == a01)
 819                 {
 820                     ac1 = a11;
 821                 }
 822                 else
 823                 {
 824                     ac1 = a10;
 825                 }
 826                 for(n2=at2con->index[ac1]; n2<at2con->index[ac1+1]; n2++)
 827                 {
 828                     c2 = at2con->a[n2];
 829                     if (c2 != c0 && c2 != c1)
 830                     {
 831                         iap = constr_iatomptr(ncon1,ia1,ia2,c2);
 832                         a20 = iap[1];
 833                         a21 = iap[2];
 834                         if (a20 == a00 || a21 == a00)
 835                         {
 836                             bTriangle = TRUE;
 837                         }
 838                     }
 839                 }
 840             }
 841         }
 842         if (bTriangle)
 843         {
 844             ncon_triangle++;
 845         }
 846     }
 847
 848     return ncon_triangle;
 849 }
 850
 851 static int int_comp(const void *a,const void *b)
 852 {
 853     return (*(int *)a) - (*(int *)b);
 854 }
 855
 856 gmx_lincsdata_t init_lincs(FILE *fplog,gmx_mtop_t *mtop,
 857                            int nflexcon_global,t_blocka *at2con,
 858                            gmx_bool bPLINCS,int nIter,int nProjOrder)
 859 {
 860     struct gmx_lincsdata *li;
 861     int mb;
 862     gmx_moltype_t *molt;
 863
 864     if (fplog)
 865     {
 866         fprintf(fplog,"\nInitializing%s LINear Constraint Solver\n",
 867                 bPLINCS ? " Parallel" : "");
 868     }
 869
 870     snew(li,1);
 871
 872     li->ncg      =
 873         gmx_mtop_ftype_count(mtop,F_CONSTR) +
 874         gmx_mtop_ftype_count(mtop,F_CONSTRNC);
 875     li->ncg_flex = nflexcon_global;
 876
 877     li->ncg_triangle = 0;
 878     for(mb=0; mb<mtop->nmolblock; mb++)
 879     {
 880         molt = &mtop->moltype[mtop->molblock[mb].type];
 881         li->ncg_triangle +=
 882             mtop->molblock[mb].nmol*
 883             count_triangle_constraints(molt->ilist,
 884                                        &at2con[mtop->molblock[mb].type]);
 885     }
 886
 887     li->nIter  = nIter;
 888     li->nOrder = nProjOrder;
 889
 890     /* LINCS can run on any number of threads.
 891      * Currently the number is fixed for the whole simulation,
 892      * but it could be set in set_lincs().
 893      */
 894     li->nth = gmx_omp_nthreads_get(emntLINCS);
 895     if (li->nth == 1)
 896     {
 897         snew(li->th,1);
 898     }
 899     else
 900     {
 901         /* Allocate an extra elements for "thread-overlap" constraints */
 902         snew(li->th,li->nth+1);
 903     }
 904     if (debug)
 905     {
 906         fprintf(debug,"LINCS: using %d threads\n",li->nth);
 907     }
 908
 909     if (bPLINCS || li->ncg_triangle > 0)
 910     {
 911         please_cite(fplog,"Hess2008a");
 912     }
 913     else
 914     {
 915         please_cite(fplog,"Hess97a");
 916     }
 917
 918     if (fplog)
 919     {
 920         fprintf(fplog,"The number of constraints is %d\n",li->ncg);
 921         if (bPLINCS)
 922         {
 923             fprintf(fplog,"There are inter charge-group constraints,\n"
 924                     "will communicate selected coordinates each lincs iteration\n");
 925         }
 926         if (li->ncg_triangle > 0)
 927         {
 928             fprintf(fplog,
 929                     "%d constraints are involved in constraint triangles,\n"
 930                     "will apply an additional matrix expansion of order %d for couplings\n"
 931                     "between constraints inside triangles\n",
 932                     li->ncg_triangle,li->nOrder);
 933         }
 934     }
 935
 936     return li;
 937 }
 938
 939 /* Sets up the work division over the threads */
 940 static void lincs_thread_setup(struct gmx_lincsdata *li,int natoms)
 941 {
 942     lincs_thread_t *li_m;
 943     int th;
 944     unsigned *atf;
 945     int a;
 946
 947     if (natoms > li->atf_nalloc)
 948     {
 949         li->atf_nalloc = over_alloc_large(natoms);
 950         srenew(li->atf,li->atf_nalloc);
 951     }
 952
 953     atf = li->atf;
 954     /* Clear the atom flags */
 955     for(a=0; a<natoms; a++)
 956     {
 957         atf[a] = 0;
 958     }
 959
 960     for(th=0; th<li->nth; th++)
 961     {
 962         lincs_thread_t *li_th;
 963         int b;
 964
 965         li_th = &li->th[th];
 966
 967         /* The constraints are divided equally over the threads */
 968         li_th->b0 = (li->nc* th   )/li->nth;
 969         li_th->b1 = (li->nc*(th+1))/li->nth;
 970
 971         if (th < sizeof(*atf)*8)
 972         {
 973             /* For each atom set a flag for constraints from each */
 974             for(b=li_th->b0; b<li_th->b1; b++)
 975             {
 976                 atf[li->bla[b*2]  ] |= (1U<<th);
 977                 atf[li->bla[b*2+1]] |= (1U<<th);
 978             }
 979         }
 980     }
 981
 982 #pragma omp parallel for num_threads(li->nth) schedule(static)
 983     for(th=0; th<li->nth; th++)
 984     {
 985         lincs_thread_t *li_th;
 986         unsigned mask;
 987         int b;
 988
 989         li_th = &li->th[th];
 990
 991         if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
 992         {
 993             li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
 994             srenew(li_th->ind,li_th->ind_nalloc);
 995             srenew(li_th->ind_r,li_th->ind_nalloc);
 996         }
 997
 998         if (th < sizeof(*atf)*8)
 999         {
1000             mask = (1U<<th) - 1U;
1001
1002             li_th->nind   = 0;
1003             li_th->nind_r = 0;
1004             for(b=li_th->b0; b<li_th->b1; b++)
1005             {
1006                 /* We let the constraint with the lowest thread index
1007                  * operate on atoms with constraints from multiple threads.
1008                  */
1009                 if (((atf[li->bla[b*2]]   & mask) == 0) &&
1010                     ((atf[li->bla[b*2+1]] & mask) == 0))
1011                 {
1012                     /* Add the constraint to the local atom update index */
1013                     li_th->ind[li_th->nind++] = b;
1014                 }
1015                 else
1016                 {
1017                     /* Add the constraint to the rest block */
1018                     li_th->ind_r[li_th->nind_r++] = b;
1019                 }
1020             }
1021         }
1022         else
1023         {
1024             /* We are out of bits, assign all constraints to rest */
1025             for(b=li_th->b0; b<li_th->b1; b++)
1026             {
1027                 li_th->ind_r[li_th->nind_r++] = b;
1028             }
1029         }
1030     }
1031
1032     /* We need to copy all constraints which have not be assigned
1033      * to a thread to a separate list which will be handled by one thread.
1034      */
1035     li_m = &li->th[li->nth];
1036
1037     li_m->nind = 0;
1038     for(th=0; th<li->nth; th++)
1039     {
1040         lincs_thread_t *li_th;
1041         int b;
1042
1043         li_th   = &li->th[th];
1044
1045         if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1046         {
1047             li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1048             srenew(li_m->ind,li_m->ind_nalloc);
1049         }
1050
1051         for(b=0; b<li_th->nind_r; b++)
1052         {
1053             li_m->ind[li_m->nind++] = li_th->ind_r[b];
1054         }
1055
1056         if (debug)
1057         {
1058             fprintf(debug,"LINCS thread %d: %d constraints\n",
1059                     th,li_th->nind);
1060         }
1061     }
1062
1063     if (debug)
1064     {
1065         fprintf(debug,"LINCS thread r: %d constraints\n",
1066                 li_m->nind);
1067     }
1068 }
1069
1070
1071 void set_lincs(t_idef *idef,t_mdatoms *md,
1072                gmx_bool bDynamics,t_commrec *cr,
1073                struct gmx_lincsdata *li)
1074 {
1075     int      start,natoms,nflexcon;
1076     t_blocka at2con;
1077     t_iatom  *iatom;
1078     int      i,k,ncc_alloc,ni,con,nconnect,concon;
1079     int      type,a1,a2;
1080     real     lenA=0,lenB;
1081     gmx_bool     bLocal;
1082
1083     li->nc = 0;
1084     li->ncc = 0;
1085     /* Zero the thread index ranges.
1086      * Otherwise without local constraints we could return with old ranges.
1087      */
1088     for(i=0; i<li->nth; i++)
1089     {
1090         li->th[i].b0   = 0;
1091         li->th[i].b1   = 0;
1092         li->th[i].nind = 0;
1093     }
1094     if (li->nth > 1)
1095     {
1096         li->th[li->nth].nind = 0;
1097     }
1098
1099     /* This is the local topology, so there are only F_CONSTR constraints */
1100     if (idef->il[F_CONSTR].nr == 0)
1101     {
1102         /* There are no constraints,
1103          * we do not need to fill any data structures.
1104          */
1105         return;
1106     }
1107
1108     if (debug)
1109     {
1110         fprintf(debug,"Building the LINCS connectivity\n");
1111     }
1112
1113     if (DOMAINDECOMP(cr))
1114     {
1115         if (cr->dd->constraints)
1116         {
1117             dd_get_constraint_range(cr->dd,&start,&natoms);
1118         }
1119         else
1120         {
1121             natoms = cr->dd->nat_home;
1122         }
1123         start = 0;
1124     }
1125     else if(PARTDECOMP(cr))
1126         {
1127                 pd_get_constraint_range(cr->pd,&start,&natoms);
1128         }
1129         else
1130     {
1131         start  = md->start;
1132         natoms = md->homenr;
1133     }
1134     at2con = make_at2con(start,natoms,idef->il,idef->iparams,bDynamics,
1135                          &nflexcon);
1136
1137
1138     if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1139     {
1140         li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1141         srenew(li->bllen0,li->nc_alloc);
1142         srenew(li->ddist,li->nc_alloc);
1143         srenew(li->bla,2*li->nc_alloc);
1144         srenew(li->blc,li->nc_alloc);
1145         srenew(li->blc1,li->nc_alloc);
1146         srenew(li->blnr,li->nc_alloc+1);
1147         srenew(li->bllen,li->nc_alloc);
1148         srenew(li->tmpv,li->nc_alloc);
1149         srenew(li->tmp1,li->nc_alloc);
1150         srenew(li->tmp2,li->nc_alloc);
1151         srenew(li->tmp3,li->nc_alloc);
1152         srenew(li->tmp4,li->nc_alloc);
1153         srenew(li->mlambda,li->nc_alloc);
1154         if (li->ncg_triangle > 0)
1155         {
1156             /* This is allocating too much, but it is difficult to improve */
1157             srenew(li->triangle,li->nc_alloc);
1158             srenew(li->tri_bits,li->nc_alloc);
1159         }
1160     }
1161
1162     iatom = idef->il[F_CONSTR].iatoms;
1163
1164     ncc_alloc = li->ncc_alloc;
1165     li->blnr[0] = 0;
1166
1167     ni = idef->il[F_CONSTR].nr/3;
1168
1169     con = 0;
1170     nconnect = 0;
1171     li->blnr[con] = nconnect;
1172     for(i=0; i<ni; i++)
1173     {
1174         bLocal = TRUE;
1175         type = iatom[3*i];
1176         a1   = iatom[3*i+1];
1177         a2   = iatom[3*i+2];
1178         lenA = idef->iparams[type].constr.dA;
1179         lenB = idef->iparams[type].constr.dB;
1180         /* Skip the flexible constraints when not doing dynamics */
1181         if (bDynamics || lenA!=0 || lenB!=0)
1182         {
1183             li->bllen0[con]  = lenA;
1184             li->ddist[con]   = lenB - lenA;
1185             /* Set the length to the topology A length */
1186             li->bllen[con]   = li->bllen0[con];
1187             li->bla[2*con]   = a1;
1188             li->bla[2*con+1] = a2;
1189             /* Construct the constraint connection matrix blbnb */
1190             for(k=at2con.index[a1-start]; k<at2con.index[a1-start+1]; k++)
1191             {
1192                 concon = at2con.a[k];
1193                 if (concon != i)
1194                 {
1195                     if (nconnect >= ncc_alloc)
1196                     {
1197                         ncc_alloc = over_alloc_small(nconnect+1);
1198                         srenew(li->blbnb,ncc_alloc);
1199                     }
1200                     li->blbnb[nconnect++] = concon;
1201                 }
1202             }
1203             for(k=at2con.index[a2-start]; k<at2con.index[a2-start+1]; k++)
1204             {
1205                 concon = at2con.a[k];
1206                 if (concon != i)
1207                 {
1208                     if (nconnect+1 > ncc_alloc)
1209                     {
1210                         ncc_alloc = over_alloc_small(nconnect+1);
1211                         srenew(li->blbnb,ncc_alloc);
1212                     }
1213                     li->blbnb[nconnect++] = concon;
1214                 }
1215             }
1216             li->blnr[con+1] = nconnect;
1217
1218             if (cr->dd == NULL)
1219             {
1220                 /* Order the blbnb matrix to optimize memory access */
1221                 qsort(&(li->blbnb[li->blnr[con]]),li->blnr[con+1]-li->blnr[con],
1222                       sizeof(li->blbnb[0]),int_comp);
1223             }
1224             /* Increase the constraint count */
1225             con++;
1226         }
1227     }
1228
1229     done_blocka(&at2con);
1230
1231     /* This is the real number of constraints,
1232      * without dynamics the flexible constraints are not present.
1233      */
1234     li->nc = con;
1235
1236     li->ncc = li->blnr[con];
1237     if (cr->dd == NULL)
1238     {
1239         /* Since the matrix is static, we can free some memory */
1240         ncc_alloc = li->ncc;
1241         srenew(li->blbnb,ncc_alloc);
1242     }
1243
1244     if (ncc_alloc > li->ncc_alloc)
1245     {
1246         li->ncc_alloc = ncc_alloc;
1247         srenew(li->blmf,li->ncc_alloc);
1248         srenew(li->blmf1,li->ncc_alloc);
1249         srenew(li->tmpncc,li->ncc_alloc);
1250     }
1251
1252     if (debug)
1253     {
1254         fprintf(debug,"Number of constraints is %d, couplings %d\n",
1255                 li->nc,li->ncc);
1256     }
1257
1258     if (li->nth == 1)
1259     {
1260         li->th[0].b0 = 0;
1261         li->th[0].b1 = li->nc;
1262     }
1263     else
1264     {
1265         lincs_thread_setup(li,md->nr);
1266     }
1267
1268     set_lincs_matrix(li,md->invmass,md->lambda);
1269 }
1270
1271 static void lincs_warning(FILE *fplog,
1272                           gmx_domdec_t *dd,rvec *x,rvec *xprime,t_pbc *pbc,
1273                           int ncons,int *bla,real *bllen,real wangle,
1274                           int maxwarn,int *warncount)
1275 {
1276     int b,i,j;
1277     rvec v0,v1;
1278     real wfac,d0,d1,cosine;
1279     char buf[STRLEN];
1280
1281     wfac=cos(DEG2RAD*wangle);
1282
1283     sprintf(buf,"bonds that rotated more than %g degrees:\n"
1284             " atom 1 atom 2  angle  previous, current, constraint length\n",
1285             wangle);
1286     fprintf(stderr,"%s",buf);
1287     if (fplog)
1288     {
1289         fprintf(fplog,"%s",buf);
1290     }
1291
1292     for(b=0;b<ncons;b++)
1293     {
1294         i = bla[2*b];
1295         j = bla[2*b+1];
1296         if (pbc)
1297         {
1298             pbc_dx_aiuc(pbc,x[i],x[j],v0);
1299             pbc_dx_aiuc(pbc,xprime[i],xprime[j],v1);
1300         }
1301         else
1302         {
1303             rvec_sub(x[i],x[j],v0);
1304             rvec_sub(xprime[i],xprime[j],v1);
1305         }
1306         d0 = norm(v0);
1307         d1 = norm(v1);
1308         cosine = iprod(v0,v1)/(d0*d1);
1309         if (cosine < wfac)
1310         {
1311             sprintf(buf," %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n",
1312                     ddglatnr(dd,i),ddglatnr(dd,j),
1313                     RAD2DEG*acos(cosine),d0,d1,bllen[b]);
1314             fprintf(stderr,"%s",buf);
1315             if (fplog)
1316             {
1317                 fprintf(fplog,"%s",buf);
1318             }
1319             if (!gmx_isfinite(d1))
1320             {
1321                 gmx_fatal(FARGS,"Bond length not finite.");
1322             }
1323
1324             (*warncount)++;
1325         }
1326     }
1327     if (*warncount > maxwarn)
1328     {
1329         too_many_constraint_warnings(econtLINCS,*warncount);
1330     }
1331 }
1332
1333 static void cconerr(gmx_domdec_t *dd,
1334                     int ncons,int *bla,real *bllen,rvec *x,t_pbc *pbc,
1335                     real *ncons_loc,real *ssd,real *max,int *imax)
1336 {
1337     real      len,d,ma,ssd2,r2;
1338     int       *nlocat,count,b,im;
1339     rvec      dx;
1340
1341     if (dd && dd->constraints)
1342     {
1343         nlocat = dd_constraints_nlocalatoms(dd);
1344     }
1345     else
1346     {
1347         nlocat = 0;
1348     }
1349
1350     ma = 0;
1351     ssd2 = 0;
1352     im = 0;
1353     count = 0;
1354     for(b=0;b<ncons;b++)
1355     {
1356         if (pbc)
1357         {
1358             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
1359         }
1360         else {
1361             rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
1362         }
1363         r2 = norm2(dx);
1364         len = r2*gmx_invsqrt(r2);
1365         d = fabs(len/bllen[b]-1);
1366         if (d > ma && (nlocat==NULL || nlocat[b]))
1367         {
1368             ma = d;
1369             im = b;
1370         }
1371         if (nlocat == NULL)
1372         {
1373             ssd2 += d*d;
1374             count++;
1375         }
1376         else
1377         {
1378             ssd2 += nlocat[b]*d*d;
1379             count += nlocat[b];
1380         }
1381     }
1382
1383     *ncons_loc = (nlocat ? 0.5 : 1)*count;
1384     *ssd       = (nlocat ? 0.5 : 1)*ssd2;
1385     *max = ma;
1386     *imax = im;
1387 }
1388
1389 static void dump_conf(gmx_domdec_t *dd,struct gmx_lincsdata *li,
1390                       t_blocka *at2con,
1391                       char *name,gmx_bool bAll,rvec *x,matrix box)
1392 {
1393     char str[STRLEN];
1394     FILE *fp;
1395     int  ac0,ac1,i;
1396
1397     dd_get_constraint_range(dd,&ac0,&ac1);
1398
1399     sprintf(str,"%s_%d_%d_%d.pdb",name,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
1400     fp = gmx_fio_fopen(str,"w");
1401     fprintf(fp,"CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1\n",
1402             10*norm(box[XX]),10*norm(box[YY]),10*norm(box[ZZ]),
1403             90.0,90.0,90.0);
1404     for(i=0; i<ac1; i++)
1405     {
1406         if (i < dd->nat_home || (bAll && i >= ac0 && i < ac1))
1407         {
1408             fprintf(fp,"%-6s%5u  %-4.4s%3.3s %c%4d    %8.3f%8.3f%8.3f%6.2f%6.2f\n",
1409                     "ATOM",ddglatnr(dd,i),"C","ALA",' ',i+1,
1410                     10*x[i][XX],10*x[i][YY],10*x[i][ZZ],
1411                     1.0,i<dd->nat_tot ? 0.0 : 1.0);
1412         }
1413     }
1414     if (bAll)
1415     {
1416         for(i=0; i<li->nc; i++)
1417         {
1418             fprintf(fp,"CONECT%5d%5d\n",
1419                     ddglatnr(dd,li->bla[2*i]),
1420                     ddglatnr(dd,li->bla[2*i+1]));
1421         }
1422     }
1423     gmx_fio_fclose(fp);
1424 }
1425
1426 gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
1427                          t_inputrec *ir,
1428                          gmx_large_int_t step,
1429                          struct gmx_lincsdata *lincsd,t_mdatoms *md,
1430                          t_commrec *cr,
1431                          rvec *x,rvec *xprime,rvec *min_proj,
1432                          matrix box,t_pbc *pbc,
1433                          real lambda,real *dvdlambda,
1434                          real invdt,rvec *v,
1435                          gmx_bool bCalcVir,tensor vir_r_m_dr,
1436                          int econq,
1437                          t_nrnb *nrnb,
1438                          int maxwarn,int *warncount)
1439 {
1440     char  buf[STRLEN],buf2[22],buf3[STRLEN];
1441     int   i,warn,p_imax,error;
1442     real  ncons_loc,p_ssd,p_max=0;
1443     rvec  dx;
1444     gmx_bool  bOK;
1445
1446     bOK = TRUE;
1447
1448     if (lincsd->nc == 0 && cr->dd == NULL)
1449     {
1450         if (bLog || bEner)
1451         {
1452             lincsd->rmsd_data[0] = 0;
1453             if (ir->eI == eiSD2 && v == NULL)
1454             {
1455                 i = 2;
1456             }
1457             else
1458             {
1459                 i = 1;
1460             }
1461             lincsd->rmsd_data[i] = 0;
1462         }
1463
1464         return bOK;
1465     }
1466
1467     if (econq == econqCoord)
1468     {
1469         if (ir->efep != efepNO)
1470         {
1471             if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1472             {
1473                 set_lincs_matrix(lincsd,md->invmass,md->lambda);
1474             }
1475
1476             for(i=0; i<lincsd->nc; i++)
1477             {
1478                 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1479             }
1480         }
1481
1482         if (lincsd->ncg_flex)
1483         {
1484             /* Set the flexible constraint lengths to the old lengths */
1485             if (pbc != NULL)
1486             {
1487                 for(i=0; i<lincsd->nc; i++)
1488                 {
1489                     if (lincsd->bllen[i] == 0) {
1490                         pbc_dx_aiuc(pbc,x[lincsd->bla[2*i]],x[lincsd->bla[2*i+1]],dx);
1491                         lincsd->bllen[i] = norm(dx);
1492                     }
1493                 }
1494             }
1495             else
1496             {
1497                 for(i=0; i<lincsd->nc; i++)
1498                 {
1499                     if (lincsd->bllen[i] == 0)
1500                     {
1501                         lincsd->bllen[i] =
1502                             sqrt(distance2(x[lincsd->bla[2*i]],
1503                                            x[lincsd->bla[2*i+1]]));
1504                     }
1505                 }
1506             }
1507         }
1508
1509         if (bLog && fplog)
1510         {
1511             cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1512                     &ncons_loc,&p_ssd,&p_max,&p_imax);
1513         }
1514
1515         /* This warn var can be updated by multiple threads
1516          * at the same time. But as we only need to detect
1517          * if a warning occured or not, this is not an issue.
1518          */
1519         warn = -1;
1520
1521         /* The OpenMP parallel region of constrain_lincs for coords */
1522 #pragma omp parallel num_threads(lincsd->nth)
1523         {
1524             int th=gmx_omp_get_thread_num();
1525
1526             clear_mat(lincsd->th[th].vir_r_m_dr);
1527
1528             do_lincs(x,xprime,box,pbc,lincsd,th,
1529                      md->invmass,cr,
1530                      bCalcVir || (ir->efep != efepNO),
1531                      ir->LincsWarnAngle,&warn,
1532                      invdt,v,bCalcVir,
1533                      th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1534         }
1535
1536         if (ir->efep != efepNO)
1537         {
1538             real dt_2,dvdl=0;
1539
1540             dt_2 = 1.0/(ir->delta_t*ir->delta_t);
1541             for(i=0; (i<lincsd->nc); i++)
1542             {
1543                 dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
1544             }
1545             *dvdlambda += dvdl;
1546                 }
1547
1548         if (bLog && fplog && lincsd->nc > 0)
1549         {
1550             fprintf(fplog,"   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
1551             fprintf(fplog,"       Before LINCS          %.6f    %.6f %6d %6d\n",
1552                     sqrt(p_ssd/ncons_loc),p_max,
1553                     ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1554                     ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1555         }
1556         if (bLog || bEner)
1557         {
1558             cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1559                     &ncons_loc,&p_ssd,&p_max,&p_imax);
1560             /* Check if we are doing the second part of SD */
1561             if (ir->eI == eiSD2 && v == NULL)
1562             {
1563                 i = 2;
1564             }
1565             else
1566             {
1567                 i = 1;
1568             }
1569             lincsd->rmsd_data[0] = ncons_loc;
1570             lincsd->rmsd_data[i] = p_ssd;
1571         }
1572         else
1573         {
1574             lincsd->rmsd_data[0] = 0;
1575             lincsd->rmsd_data[1] = 0;
1576             lincsd->rmsd_data[2] = 0;
1577         }
1578         if (bLog && fplog && lincsd->nc > 0)
1579         {
1580             fprintf(fplog,
1581                     "        After LINCS          %.6f    %.6f %6d %6d\n\n",
1582                     sqrt(p_ssd/ncons_loc),p_max,
1583                     ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1584                     ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1585         }
1586
1587         if (warn >= 0)
1588         {
1589             if (maxwarn >= 0)
1590             {
1591                 cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1592                         &ncons_loc,&p_ssd,&p_max,&p_imax);
1593                 if (MULTISIM(cr))
1594                 {
1595                     sprintf(buf3," in simulation %d", cr->ms->sim);
1596                 }
1597                 else
1598                 {
1599                     buf3[0] = 0;
1600                 }
1601                 sprintf(buf,"\nStep %s, time %g (ps)  LINCS WARNING%s\n"
1602                         "relative constraint deviation after LINCS:\n"
1603                         "rms %.6f, max %.6f (between atoms %d and %d)\n",
1604                         gmx_step_str(step,buf2),ir->init_t+step*ir->delta_t,
1605                         buf3,
1606                         sqrt(p_ssd/ncons_loc),p_max,
1607                         ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1608                         ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1609                 if (fplog)
1610                 {
1611                     fprintf(fplog,"%s",buf);
1612                 }
1613                 fprintf(stderr,"%s",buf);
1614                 lincs_warning(fplog,cr->dd,x,xprime,pbc,
1615                               lincsd->nc,lincsd->bla,lincsd->bllen,
1616                               ir->LincsWarnAngle,maxwarn,warncount);
1617             }
1618             bOK = (p_max < 0.5);
1619         }
1620
1621         if (lincsd->ncg_flex) {
1622             for(i=0; (i<lincsd->nc); i++)
1623                 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1624                     lincsd->bllen[i] = 0;
1625         }
1626     }
1627     else
1628     {
1629         /* The OpenMP parallel region of constrain_lincs for derivatives */
1630 #pragma omp parallel num_threads(lincsd->nth)
1631         {
1632             int th=gmx_omp_get_thread_num();
1633
1634             do_lincsp(x,xprime,min_proj,pbc,lincsd,th,
1635                       md->invmass,econq,ir->efep != efepNO ? dvdlambda : NULL,
1636                       bCalcVir,th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1637         }
1638     }
1639
1640     if (bCalcVir && lincsd->nth > 1)
1641     {
1642         for(i=1; i<lincsd->nth; i++)
1643         {
1644             m_add(vir_r_m_dr,lincsd->th[i].vir_r_m_dr,vir_r_m_dr);
1645         }
1646     }
1647
1648     /* count assuming nit=1 */
1649     inc_nrnb(nrnb,eNR_LINCS,lincsd->nc);
1650     inc_nrnb(nrnb,eNR_LINCSMAT,(2+lincsd->nOrder)*lincsd->ncc);
1651     if (lincsd->ntriangle > 0)
1652     {
1653         inc_nrnb(nrnb,eNR_LINCSMAT,lincsd->nOrder*lincsd->ncc_triangle);
1654     }
1655     if (v)
1656     {
1657         inc_nrnb(nrnb,eNR_CONSTR_V,lincsd->nc*2);
1658     }
1659     if (bCalcVir)
1660     {
1661         inc_nrnb(nrnb,eNR_CONSTR_VIR,lincsd->nc);
1662     }
1663
1664     return bOK;
1665 }