src/gromacs/mdlib/clincs.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15  *
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 /* This file is completely threadsafe - keep it that way! */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include "main.h"
  43 #include "constr.h"
  44 #include "copyrite.h"
  45 #include "physics.h"
  46 #include "vec.h"
  47 #include "pbc.h"
  48 #include "smalloc.h"
  49 #include "mdrun.h"
  50 #include "nrnb.h"
  51 #include "domdec.h"
  52 #include "partdec.h"
  53 #include "mtop_util.h"
  54 #include "gmxfio.h"
  55 #include "gmx_omp_nthreads.h"
  56 #include "gmx_omp.h"
  57
  58 typedef struct {
  59     int b0;           /* first constraint for this thread */
  60     int b1;           /* b1-1 is the last constraint for this thread */
  61     int nind;         /* number of indices */
  62     int *ind;         /* constraint index for updating atom data */
  63     int nind_r;       /* number of indices */
  64     int *ind_r;       /* constraint index for updating atom data */
  65     int ind_nalloc;   /* allocation size of ind and ind_r */
  66     tensor vir_r_m_dr;/* temporary variable for virial calculation */
  67 } lincs_thread_t;
  68
  69 typedef struct gmx_lincsdata {
  70     int  ncg;         /* the global number of constraints */
  71     int  ncg_flex;    /* the global number of flexible constraints */
  72     int  ncg_triangle;/* the global number of constraints in triangles */
  73     int  nIter;       /* the number of iterations */
  74     int  nOrder;      /* the order of the matrix expansion */
  75     int  nc;          /* the number of constraints */
  76     int  nc_alloc;    /* the number we allocated memory for */
  77     int  ncc;         /* the number of constraint connections */
  78     int  ncc_alloc;   /* the number we allocated memory for */
  79     real matlam;      /* the FE lambda value used for filling blc and blmf */
  80     real *bllen0;     /* the reference distance in topology A */
  81     real *ddist;      /* the reference distance in top B - the r.d. in top A */
  82     int  *bla;        /* the atom pairs involved in the constraints */
  83     real *blc;        /* 1/sqrt(invmass1 + invmass2) */
  84     real *blc1;       /* as blc, but with all masses 1 */
  85     int  *blnr;       /* index into blbnb and blmf */
  86     int  *blbnb;      /* list of constraint connections */
  87     int  ntriangle;   /* the local number of constraints in triangles */
  88     int  *triangle;   /* the list of triangle constraints */
  89     int  *tri_bits;   /* the bits tell if the matrix element should be used */
  90     int  ncc_triangle;/* the number of constraint connections in triangles */
  91     gmx_bool bCommIter; /* communicate before each LINCS interation */
  92     real *blmf;       /* matrix of mass factors for constraint connections */
  93     real *blmf1;      /* as blmf, but with all masses 1 */
  94     real *bllen;      /* the reference bond length */
  95     int  nth;         /* The number of threads doing LINCS */
  96     lincs_thread_t *th; /* LINCS thread division */
  97     unsigned *atf;    /* atom flags for thread parallelization */
  98     int  atf_nalloc;  /* allocation size of atf */
  99     /* arrays for temporary storage in the LINCS algorithm */
 100     rvec *tmpv;
 101     real *tmpncc;
 102     real *tmp1;
 103     real *tmp2;
 104     real *tmp3;
 105     real *tmp4;
 106     real *mlambda;  /* the Lagrange multipliers * -1 */
 107     /* storage for the constraint RMS relative deviation output */
 108     real rmsd_data[3];
 109 } t_gmx_lincsdata;
 110
 111 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
 112 {
 113     return lincsd->rmsd_data;
 114 }
 115
 116 real lincs_rmsd(struct gmx_lincsdata *lincsd,gmx_bool bSD2)
 117 {
 118     if (lincsd->rmsd_data[0] > 0)
 119     {
 120         return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
 121     }
 122     else
 123     {
 124         return 0;
 125     }
 126 }
 127
 128 /* Do a set of nrec LINCS matrix multiplications.
 129  * This function will return with up to date thread-local
 130  * constraint data, without an OpenMP barrier.
 131  */
 132 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
 133                                 int b0,int b1,
 134                                 const real *blcc,
 135                                 real *rhs1,real *rhs2,real *sol)
 136 {
 137     int  nrec,rec,b,j,n,nr0,nr1;
 138     real mvb,*swap;
 139     int  ntriangle,tb,bits;
 140     const int *blnr=lincsd->blnr,*blbnb=lincsd->blbnb;
 141     const int *triangle=lincsd->triangle,*tri_bits=lincsd->tri_bits;
 142
 143     ntriangle = lincsd->ntriangle;
 144     nrec      = lincsd->nOrder;
 145
 146     for(rec=0; rec<nrec; rec++)
 147     {
 148 #pragma omp barrier
 149         for(b=b0; b<b1; b++)
 150         {
 151             mvb = 0;
 152             for(n=blnr[b]; n<blnr[b+1]; n++)
 153             {
 154                 j = blbnb[n];
 155                 mvb = mvb + blcc[n]*rhs1[j];
 156             }
 157             rhs2[b] = mvb;
 158             sol[b]  = sol[b] + mvb;
 159         }
 160         swap = rhs1;
 161         rhs1 = rhs2;
 162         rhs2 = swap;
 163     } /* nrec*(ncons+2*nrtot) flops */
 164
 165     if (ntriangle > 0)
 166     {
 167         /* Perform an extra nrec recursions for only the constraints
 168          * involved in rigid triangles.
 169          * In this way their accuracy should come close to those of the other
 170          * constraints, since traingles of constraints can produce eigenvalues
 171          * around 0.7, while the effective eigenvalue for bond constraints
 172          * is around 0.4 (and 0.7*0.7=0.5).
 173          */
 174         /* We need to copy the temporary array, since only the elements
 175          * for constraints involved in triangles are updated and then
 176          * the pointers are swapped. This saving copying the whole arrary.
 177          * We need barrier as other threads might still be reading from rhs2.
 178          */
 179 #pragma omp barrier
 180         for(b=b0; b<b1; b++)
 181         {
 182             rhs2[b] = rhs1[b];
 183         }
 184 #pragma omp barrier
 185 #pragma omp master
 186         {
 187             for(rec=0; rec<nrec; rec++)
 188             {
 189                 for(tb=0; tb<ntriangle; tb++)
 190                 {
 191                     b    = triangle[tb];
 192                     bits = tri_bits[tb];
 193                     mvb = 0;
 194                     nr0 = blnr[b];
 195                     nr1 = blnr[b+1];
 196                     for(n=nr0; n<nr1; n++)
 197                     {
 198                         if (bits & (1<<(n-nr0)))
 199                         {
 200                             j = blbnb[n];
 201                             mvb = mvb + blcc[n]*rhs1[j];
 202                         }
 203                     }
 204                     rhs2[b] = mvb;
 205                     sol[b]  = sol[b] + mvb;
 206                 }
 207                 swap = rhs1;
 208                 rhs1 = rhs2;
 209                 rhs2 = swap;
 210             }
 211         } /* flops count is missing here */
 212
 213         /* We need a barrier here as the calling routine will continue
 214          * to operate on the thread-local constraints without barrier.
 215          */
 216 #pragma omp barrier
 217     }
 218 }
 219
 220 static void lincs_update_atoms_noind(int ncons,const int *bla,
 221                                      real prefac,
 222                                      const real *fac,rvec *r,
 223                                      const real *invmass,
 224                                      rvec *x)
 225 {
 226     int  b,i,j;
 227     real mvb,im1,im2,tmp0,tmp1,tmp2;
 228
 229     if (invmass != NULL)
 230     {
 231         for(b=0; b<ncons; b++)
 232         {
 233             i = bla[2*b];
 234             j = bla[2*b+1];
 235             mvb = prefac*fac[b];
 236             im1 = invmass[i];
 237             im2 = invmass[j];
 238             tmp0 = r[b][0]*mvb;
 239             tmp1 = r[b][1]*mvb;
 240             tmp2 = r[b][2]*mvb;
 241             x[i][0] -= tmp0*im1;
 242             x[i][1] -= tmp1*im1;
 243             x[i][2] -= tmp2*im1;
 244             x[j][0] += tmp0*im2;
 245             x[j][1] += tmp1*im2;
 246             x[j][2] += tmp2*im2;
 247         } /* 16 ncons flops */
 248     }
 249     else
 250     {
 251         for(b=0; b<ncons; b++)
 252         {
 253             i = bla[2*b];
 254             j = bla[2*b+1];
 255             mvb = prefac*fac[b];
 256             tmp0 = r[b][0]*mvb;
 257             tmp1 = r[b][1]*mvb;
 258             tmp2 = r[b][2]*mvb;
 259             x[i][0] -= tmp0;
 260             x[i][1] -= tmp1;
 261             x[i][2] -= tmp2;
 262             x[j][0] += tmp0;
 263             x[j][1] += tmp1;
 264             x[j][2] += tmp2;
 265         }
 266     }
 267 }
 268
 269 static void lincs_update_atoms_ind(int ncons,const int *ind,const int *bla,
 270                                    real prefac,
 271                                    const real *fac,rvec *r,
 272                                    const real *invmass,
 273                                    rvec *x)
 274 {
 275     int  bi,b,i,j;
 276     real mvb,im1,im2,tmp0,tmp1,tmp2;
 277
 278     if (invmass != NULL)
 279     {
 280         for(bi=0; bi<ncons; bi++)
 281         {
 282             b = ind[bi];
 283             i = bla[2*b];
 284             j = bla[2*b+1];
 285             mvb = prefac*fac[b];
 286             im1 = invmass[i];
 287             im2 = invmass[j];
 288             tmp0 = r[b][0]*mvb;
 289             tmp1 = r[b][1]*mvb;
 290             tmp2 = r[b][2]*mvb;
 291             x[i][0] -= tmp0*im1;
 292             x[i][1] -= tmp1*im1;
 293             x[i][2] -= tmp2*im1;
 294             x[j][0] += tmp0*im2;
 295             x[j][1] += tmp1*im2;
 296             x[j][2] += tmp2*im2;
 297         } /* 16 ncons flops */
 298     }
 299     else
 300     {
 301         for(bi=0; bi<ncons; bi++)
 302         {
 303             b = ind[bi];
 304             i = bla[2*b];
 305             j = bla[2*b+1];
 306             mvb = prefac*fac[b];
 307             tmp0 = r[b][0]*mvb;
 308             tmp1 = r[b][1]*mvb;
 309             tmp2 = r[b][2]*mvb;
 310             x[i][0] -= tmp0;
 311             x[i][1] -= tmp1;
 312             x[i][2] -= tmp2;
 313             x[j][0] += tmp0;
 314             x[j][1] += tmp1;
 315             x[j][2] += tmp2;
 316         } /* 16 ncons flops */
 317     }
 318 }
 319
 320 static void lincs_update_atoms(struct gmx_lincsdata *li,int th,
 321                                real prefac,
 322                                const real *fac,rvec *r,
 323                                const real *invmass,
 324                                rvec *x)
 325 {
 326     if (li->nth == 1)
 327     {
 328         /* Single thread, we simply update for all constraints */
 329         lincs_update_atoms_noind(li->nc,li->bla,prefac,fac,r,invmass,x);
 330     }
 331     else
 332     {
 333         /* Update the atom vector components for our thread local
 334          * constraints that only access our local atom range.
 335          * This can be done without a barrier.
 336          */
 337         lincs_update_atoms_ind(li->th[th].nind,li->th[th].ind,
 338                                li->bla,prefac,fac,r,invmass,x);
 339
 340         if (li->th[li->nth].nind > 0)
 341         {
 342             /* Update the constraints that operate on atoms
 343              * in multiple thread atom blocks on the master thread.
 344              */
 345 #pragma omp barrier
 346 #pragma omp master
 347             {
 348                 lincs_update_atoms_ind(li->th[li->nth].nind,
 349                                        li->th[li->nth].ind,
 350                                        li->bla,prefac,fac,r,invmass,x);
 351             }
 352         }
 353     }
 354 }
 355
 356 /* LINCS projection, works on derivatives of the coordinates */
 357 static void do_lincsp(rvec *x,rvec *f,rvec *fp,t_pbc *pbc,
 358                       struct gmx_lincsdata *lincsd,int th,
 359                       real *invmass,
 360                       int econq,real *dvdlambda,
 361                       gmx_bool bCalcVir,tensor rmdf)
 362 {
 363     int     b0,b1,b,i,j,k,n;
 364     real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,wfac,lam;
 365     rvec    dx;
 366     int     *bla,*blnr,*blbnb;
 367     rvec    *r;
 368     real    *blc,*blmf,*blcc,*rhs1,*rhs2,*sol;
 369
 370     b0 = lincsd->th[th].b0;
 371     b1 = lincsd->th[th].b1;
 372
 373     bla    = lincsd->bla;
 374     r      = lincsd->tmpv;
 375     blnr   = lincsd->blnr;
 376     blbnb  = lincsd->blbnb;
 377     if (econq != econqForce)
 378     {
 379         /* Use mass-weighted parameters */
 380         blc  = lincsd->blc;
 381         blmf = lincsd->blmf;
 382     }
 383     else
 384     {
 385         /* Use non mass-weighted parameters */
 386         blc  = lincsd->blc1;
 387         blmf = lincsd->blmf1;
 388     }
 389     blcc   = lincsd->tmpncc;
 390     rhs1   = lincsd->tmp1;
 391     rhs2   = lincsd->tmp2;
 392     sol    = lincsd->tmp3;
 393
 394     /* Compute normalized i-j vectors */
 395     if (pbc)
 396     {
 397         for(b=b0; b<b1; b++)
 398         {
 399             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
 400             unitv(dx,r[b]);
 401         }
 402     }
 403     else
 404     {
 405         for(b=b0; b<b1; b++)
 406         {
 407             rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
 408             unitv(dx,r[b]);
 409         } /* 16 ncons flops */
 410     }
 411
 412 #pragma omp barrier
 413     for(b=b0; b<b1; b++)
 414     {
 415         tmp0 = r[b][0];
 416         tmp1 = r[b][1];
 417         tmp2 = r[b][2];
 418         i = bla[2*b];
 419         j = bla[2*b+1];
 420         for(n=blnr[b]; n<blnr[b+1]; n++)
 421         {
 422             k = blbnb[n];
 423             blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 424         } /* 6 nr flops */
 425         mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
 426                       tmp1*(f[i][1] - f[j][1]) +
 427                       tmp2*(f[i][2] - f[j][2]));
 428         rhs1[b] = mvb;
 429         sol[b]  = mvb;
 430         /* 7 flops */
 431     }
 432     /* Together: 23*ncons + 6*nrtot flops */
 433
 434     lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 435     /* nrec*(ncons+2*nrtot) flops */
 436
 437     if (econq == econqDeriv_FlexCon)
 438     {
 439         /* We only want to constraint the flexible constraints,
 440          * so we mask out the normal ones by setting sol to 0.
 441          */
 442         for(b=b0; b<b1; b++)
 443         {
 444             if (!(lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
 445             {
 446                 sol[b] = 0;
 447             }
 448         }
 449     }
 450
 451     /* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
 452     for(b=b0; b<b1; b++)
 453     {
 454         sol[b] *= blc[b];
 455     }
 456
 457     /* When constraining forces, we should not use mass weighting,
 458      * so we pass invmass=NULL, which results in the use of 1 for all atoms.
 459      */
 460     lincs_update_atoms(lincsd,th,1.0,sol,r,
 461                        (econq != econqForce) ? invmass : NULL,fp);
 462
 463     if (dvdlambda != NULL)
 464     {
 465 #pragma omp barrier
 466         for(b=b0; b<b1; b++)
 467         {
 468             *dvdlambda -= sol[b]*lincsd->ddist[b];
 469         }
 470         /* 10 ncons flops */
 471     }
 472
 473     if (bCalcVir)
 474     {
 475         /* Constraint virial,
 476          * determines sum r_bond x delta f,
 477          * where delta f is the constraint correction
 478          * of the quantity that is being constrained.
 479          */
 480         for(b=b0; b<b1; b++)
 481         {
 482             mvb = lincsd->bllen[b]*sol[b];
 483             for(i=0; i<DIM; i++)
 484             {
 485                 tmp1 = mvb*r[b][i];
 486                 for(j=0; j<DIM; j++)
 487                 {
 488                     rmdf[i][j] += tmp1*r[b][j];
 489                 }
 490             }
 491         } /* 23 ncons flops */
 492     }
 493 }
 494
 495 static void do_lincs(rvec *x,rvec *xp,matrix box,t_pbc *pbc,
 496                      struct gmx_lincsdata *lincsd,int th,
 497                      real *invmass,
 498                                          t_commrec *cr,
 499                      gmx_bool bCalcLambda,
 500                      real wangle,int *warn,
 501                      real invdt,rvec *v,
 502                      gmx_bool bCalcVir,tensor vir_r_m_dr)
 503 {
 504     int     b0,b1,b,i,j,k,n,iter;
 505     real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,len2,dlen2,wfac;
 506     rvec    dx;
 507     int     *bla,*blnr,*blbnb;
 508     rvec    *r;
 509     real    *blc,*blmf,*bllen,*blcc,*rhs1,*rhs2,*sol,*blc_sol,*mlambda;
 510     int     *nlocat;
 511
 512     b0 = lincsd->th[th].b0;
 513     b1 = lincsd->th[th].b1;
 514
 515     bla    = lincsd->bla;
 516     r      = lincsd->tmpv;
 517     blnr   = lincsd->blnr;
 518     blbnb  = lincsd->blbnb;
 519     blc    = lincsd->blc;
 520     blmf   = lincsd->blmf;
 521     bllen  = lincsd->bllen;
 522     blcc   = lincsd->tmpncc;
 523     rhs1   = lincsd->tmp1;
 524     rhs2   = lincsd->tmp2;
 525     sol    = lincsd->tmp3;
 526     blc_sol= lincsd->tmp4;
 527     mlambda= lincsd->mlambda;
 528
 529     if (DOMAINDECOMP(cr) && cr->dd->constraints)
 530     {
 531         nlocat = dd_constraints_nlocalatoms(cr->dd);
 532     }
 533     else if (PARTDECOMP(cr))
 534     {
 535         nlocat = pd_constraints_nlocalatoms(cr->pd);
 536     }
 537     else
 538     {
 539         nlocat = NULL;
 540     }
 541
 542     if (pbc)
 543     {
 544         /* Compute normalized i-j vectors */
 545         for(b=b0; b<b1; b++)
 546         {
 547             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
 548             unitv(dx,r[b]);
 549         }
 550 #pragma omp barrier
 551         for(b=b0; b<b1; b++)
 552         {
 553             for(n=blnr[b]; n<blnr[b+1]; n++)
 554             {
 555                 blcc[n] = blmf[n]*iprod(r[b],r[blbnb[n]]);
 556             }
 557             pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
 558             mvb = blc[b]*(iprod(r[b],dx) - bllen[b]);
 559             rhs1[b] = mvb;
 560             sol[b]  = mvb;
 561         }
 562     }
 563     else
 564     {
 565         /* Compute normalized i-j vectors */
 566         for(b=b0; b<b1; b++)
 567         {
 568             i = bla[2*b];
 569             j = bla[2*b+1];
 570             tmp0 = x[i][0] - x[j][0];
 571             tmp1 = x[i][1] - x[j][1];
 572             tmp2 = x[i][2] - x[j][2];
 573             rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
 574             r[b][0] = rlen*tmp0;
 575             r[b][1] = rlen*tmp1;
 576             r[b][2] = rlen*tmp2;
 577         } /* 16 ncons flops */
 578
 579 #pragma omp barrier
 580         for(b=b0; b<b1; b++)
 581         {
 582             tmp0 = r[b][0];
 583             tmp1 = r[b][1];
 584             tmp2 = r[b][2];
 585             len = bllen[b];
 586             i = bla[2*b];
 587             j = bla[2*b+1];
 588             for(n=blnr[b]; n<blnr[b+1]; n++)
 589             {
 590                 k = blbnb[n];
 591                 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 592             } /* 6 nr flops */
 593             mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
 594                           tmp1*(xp[i][1] - xp[j][1]) +
 595                           tmp2*(xp[i][2] - xp[j][2]) - len);
 596             rhs1[b] = mvb;
 597             sol[b]  = mvb;
 598             /* 10 flops */
 599         }
 600         /* Together: 26*ncons + 6*nrtot flops */
 601     }
 602
 603     lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 604     /* nrec*(ncons+2*nrtot) flops */
 605
 606     for(b=b0; b<b1; b++)
 607     {
 608         mlambda[b] = blc[b]*sol[b];
 609     }
 610
 611     /* Update the coordinates */
 612     lincs_update_atoms(lincsd,th,1.0,mlambda,r,invmass,xp);
 613
 614     /*
 615      ********  Correction for centripetal effects  ********
 616      */
 617
 618     wfac = cos(DEG2RAD*wangle);
 619     wfac = wfac*wfac;
 620
 621     for(iter=0; iter<lincsd->nIter; iter++)
 622     {
 623         if ((lincsd->bCommIter && DOMAINDECOMP(cr) && cr->dd->constraints) ||
 624             PARTDECOMP(cr))
 625         {
 626 #pragma omp barrier
 627 #pragma omp master
 628             {
 629                  /* Communicate the corrected non-local coordinates */
 630                 if (DOMAINDECOMP(cr))
 631                 {
 632                     dd_move_x_constraints(cr->dd,box,xp,NULL);
 633                 }
 634                 else
 635                 {
 636                     pd_move_x_constraints(cr,xp,NULL);
 637                 }
 638             }
 639         }
 640
 641 #pragma omp barrier
 642         for(b=b0; b<b1; b++)
 643         {
 644             len = bllen[b];
 645             if (pbc)
 646             {
 647                 pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
 648             }
 649             else
 650             {
 651                 rvec_sub(xp[bla[2*b]],xp[bla[2*b+1]],dx);
 652             }
 653             len2 = len*len;
 654             dlen2 = 2*len2 - norm2(dx);
 655             if (dlen2 < wfac*len2 && (nlocat==NULL || nlocat[b]))
 656             {
 657                 *warn = b;
 658             }
 659             if (dlen2 > 0)
 660             {
 661                 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
 662             }
 663             else
 664             {
 665                 mvb = blc[b]*len;
 666             }
 667             rhs1[b] = mvb;
 668             sol[b]  = mvb;
 669         } /* 20*ncons flops */
 670
 671         lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 672         /* nrec*(ncons+2*nrtot) flops */
 673
 674         for(b=b0; b<b1; b++)
 675         {
 676             mvb = blc[b]*sol[b];
 677             blc_sol[b]  = mvb;
 678             mlambda[b] += mvb;
 679         }
 680
 681         /* Update the coordinates */
 682         lincs_update_atoms(lincsd,th,1.0,blc_sol,r,invmass,xp);
 683     }
 684     /* nit*ncons*(37+9*nrec) flops */
 685
 686     if (v != NULL)
 687     {
 688         /* Update the velocities */
 689         lincs_update_atoms(lincsd,th,invdt,mlambda,r,invmass,v);
 690         /* 16 ncons flops */
 691     }
 692
 693     if (nlocat != NULL && bCalcLambda)
 694     {
 695         /* In lincs_update_atoms thread might cross-read mlambda */
 696 #pragma omp barrier
 697
 698         /* Only account for local atoms */
 699         for(b=b0; b<b1; b++)
 700         {
 701             mlambda[b] *= 0.5*nlocat[b];
 702         }
 703     }
 704
 705     if (bCalcVir)
 706     {
 707         /* Constraint virial */
 708         for(b=b0; b<b1; b++)
 709         {
 710             tmp0 = -bllen[b]*mlambda[b];
 711             for(i=0; i<DIM; i++)
 712             {
 713                 tmp1 = tmp0*r[b][i];
 714                 for(j=0; j<DIM; j++)
 715                 {
 716                     vir_r_m_dr[i][j] -= tmp1*r[b][j];
 717                 }
 718             }
 719         } /* 22 ncons flops */
 720     }
 721
 722     /* Total:
 723      * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
 724      * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
 725      *
 726      * (26+nrec)*ncons + (6+2*nrec)*nrtot
 727      * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
 728      * if nit=1
 729      * (63+nrec)*ncons + (6+4*nrec)*nrtot
 730      */
 731 }
 732
 733 void set_lincs_matrix(struct gmx_lincsdata *li,real *invmass,real lambda)
 734 {
 735     int i,a1,a2,n,k,sign,center;
 736     int end,nk,kk;
 737     const real invsqrt2=0.7071067811865475244;
 738
 739     for(i=0; (i<li->nc); i++)
 740     {
 741         a1 = li->bla[2*i];
 742         a2 = li->bla[2*i+1];
 743         li->blc[i]  = gmx_invsqrt(invmass[a1] + invmass[a2]);
 744         li->blc1[i] = invsqrt2;
 745     }
 746
 747     /* Construct the coupling coefficient matrix blmf */
 748     li->ntriangle = 0;
 749     li->ncc_triangle = 0;
 750     for(i=0; (i<li->nc); i++)
 751     {
 752         a1 = li->bla[2*i];
 753         a2 = li->bla[2*i+1];
 754         for(n=li->blnr[i]; (n<li->blnr[i+1]); n++)
 755         {
 756             k = li->blbnb[n];
 757             if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
 758             {
 759                 sign = -1;
 760             }
 761             else
 762             {
 763                 sign = 1;
 764             }
 765             if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
 766             {
 767                 center = a1;
 768                 end    = a2;
 769             }
 770             else
 771             {
 772                 center = a2;
 773                 end    = a1;
 774             }
 775             li->blmf[n]  = sign*invmass[center]*li->blc[i]*li->blc[k];
 776             li->blmf1[n] = sign*0.5;
 777             if (li->ncg_triangle > 0)
 778             {
 779                 /* Look for constraint triangles */
 780                 for(nk=li->blnr[k]; (nk<li->blnr[k+1]); nk++)
 781                 {
 782                     kk = li->blbnb[nk];
 783                     if (kk != i && kk != k &&
 784                         (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
 785                     {
 786                         if (li->ntriangle == 0 ||
 787                             li->triangle[li->ntriangle-1] < i)
 788                         {
 789                             /* Add this constraint to the triangle list */
 790                             li->triangle[li->ntriangle] = i;
 791                             li->tri_bits[li->ntriangle] = 0;
 792                             li->ntriangle++;
 793                             if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
 794                             {
 795                                 gmx_fatal(FARGS,"A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
 796                                           li->blnr[i+1] - li->blnr[i],
 797                                           sizeof(li->tri_bits[0])*8-1);
 798                             }
 799                         }
 800                         li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
 801                         li->ncc_triangle++;
 802                     }
 803                 }
 804             }
 805         }
 806     }
 807
 808     if (debug)
 809     {
 810         fprintf(debug,"Of the %d constraints %d participate in triangles\n",
 811                 li->nc,li->ntriangle);
 812         fprintf(debug,"There are %d couplings of which %d in triangles\n",
 813                 li->ncc,li->ncc_triangle);
 814     }
 815
 816     /* Set matlam,
 817      * so we know with which lambda value the masses have been set.
 818      */
 819     li->matlam = lambda;
 820 }
 821
 822 static int count_triangle_constraints(t_ilist *ilist,t_blocka *at2con)
 823 {
 824     int  ncon1,ncon_tot;
 825     int  c0,a00,a01,n1,c1,a10,a11,ac1,n2,c2,a20,a21;
 826     int  ncon_triangle;
 827     gmx_bool bTriangle;
 828     t_iatom *ia1,*ia2,*iap;
 829
 830     ncon1    = ilist[F_CONSTR].nr/3;
 831     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 832
 833     ia1 = ilist[F_CONSTR].iatoms;
 834     ia2 = ilist[F_CONSTRNC].iatoms;
 835
 836     ncon_triangle = 0;
 837     for(c0=0; c0<ncon_tot; c0++)
 838     {
 839         bTriangle = FALSE;
 840         iap = constr_iatomptr(ncon1,ia1,ia2,c0);
 841         a00 = iap[1];
 842         a01 = iap[2];
 843         for(n1=at2con->index[a01]; n1<at2con->index[a01+1]; n1++)
 844         {
 845             c1 = at2con->a[n1];
 846             if (c1 != c0)
 847             {
 848                 iap = constr_iatomptr(ncon1,ia1,ia2,c1);
 849                 a10 = iap[1];
 850                 a11 = iap[2];
 851                 if (a10 == a01)
 852                 {
 853                     ac1 = a11;
 854                 }
 855                 else
 856                 {
 857                     ac1 = a10;
 858                 }
 859                 for(n2=at2con->index[ac1]; n2<at2con->index[ac1+1]; n2++)
 860                 {
 861                     c2 = at2con->a[n2];
 862                     if (c2 != c0 && c2 != c1)
 863                     {
 864                         iap = constr_iatomptr(ncon1,ia1,ia2,c2);
 865                         a20 = iap[1];
 866                         a21 = iap[2];
 867                         if (a20 == a00 || a21 == a00)
 868                         {
 869                             bTriangle = TRUE;
 870                         }
 871                     }
 872                 }
 873             }
 874         }
 875         if (bTriangle)
 876         {
 877             ncon_triangle++;
 878         }
 879     }
 880
 881     return ncon_triangle;
 882 }
 883
 884 static gmx_bool more_than_two_sequential_constraints(const t_ilist *ilist,
 885                                                      const t_blocka *at2con)
 886 {
 887     t_iatom  *ia1,*ia2,*iap;
 888     int      ncon1,ncon_tot,c;
 889     int      a1,a2;
 890     gmx_bool bMoreThanTwoSequentialConstraints;
 891
 892     ncon1    = ilist[F_CONSTR].nr/3;
 893     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 894
 895     ia1 = ilist[F_CONSTR].iatoms;
 896     ia2 = ilist[F_CONSTRNC].iatoms;
 897
 898     bMoreThanTwoSequentialConstraints = FALSE;
 899     for(c=0; c<ncon_tot && !bMoreThanTwoSequentialConstraints; c++)
 900     {
 901         iap = constr_iatomptr(ncon1,ia1,ia2,c);
 902         a1 = iap[1];
 903         a2 = iap[2];
 904         /* Check if this constraint has constraints connected at both atoms */
 905         if (at2con->index[a1+1] - at2con->index[a1] > 1 &&
 906             at2con->index[a2+1] - at2con->index[a2] > 1)
 907         {
 908             bMoreThanTwoSequentialConstraints = TRUE;
 909         }
 910     }
 911
 912     return bMoreThanTwoSequentialConstraints;
 913 }
 914
 915 static int int_comp(const void *a,const void *b)
 916 {
 917     return (*(int *)a) - (*(int *)b);
 918 }
 919
 920 gmx_lincsdata_t init_lincs(FILE *fplog,gmx_mtop_t *mtop,
 921                            int nflexcon_global,t_blocka *at2con,
 922                            gmx_bool bPLINCS,int nIter,int nProjOrder)
 923 {
 924     struct gmx_lincsdata *li;
 925     int mb;
 926     gmx_moltype_t *molt;
 927
 928     if (fplog)
 929     {
 930         fprintf(fplog,"\nInitializing%s LINear Constraint Solver\n",
 931                 bPLINCS ? " Parallel" : "");
 932     }
 933
 934     snew(li,1);
 935
 936     li->ncg      =
 937         gmx_mtop_ftype_count(mtop,F_CONSTR) +
 938         gmx_mtop_ftype_count(mtop,F_CONSTRNC);
 939     li->ncg_flex = nflexcon_global;
 940
 941     li->nIter  = nIter;
 942     li->nOrder = nProjOrder;
 943
 944     li->ncg_triangle = 0;
 945     li->bCommIter = FALSE;
 946     for(mb=0; mb<mtop->nmolblock; mb++)
 947     {
 948         molt = &mtop->moltype[mtop->molblock[mb].type];
 949         li->ncg_triangle +=
 950             mtop->molblock[mb].nmol*
 951             count_triangle_constraints(molt->ilist,
 952                                        &at2con[mtop->molblock[mb].type]);
 953         if (bPLINCS && li->bCommIter == FALSE)
 954         {
 955             /* Check if we need to communicate not only before LINCS,
 956              * but also before each iteration.
 957              * The check for only two sequential constraints is only
 958              * useful for the common case of H-bond only constraints.
 959              * With more effort we could also make it useful for small
 960              * molecules with nr. sequential constraints <= nOrder-1.
 961              */
 962             li->bCommIter = (li->nOrder < 1 || more_than_two_sequential_constraints(molt->ilist,&at2con[mtop->molblock[mb].type]));
 963         }
 964     }
 965     if (debug && bPLINCS)
 966     {
 967         fprintf(debug,"PLINCS communication before each iteration: %d\n",
 968                 li->bCommIter);
 969     }
 970
 971     /* LINCS can run on any number of threads.
 972      * Currently the number is fixed for the whole simulation,
 973      * but it could be set in set_lincs().
 974      */
 975     li->nth = gmx_omp_nthreads_get(emntLINCS);
 976     if (li->nth == 1)
 977     {
 978         snew(li->th,1);
 979     }
 980     else
 981     {
 982         /* Allocate an extra elements for "thread-overlap" constraints */
 983         snew(li->th,li->nth+1);
 984     }
 985     if (debug)
 986     {
 987         fprintf(debug,"LINCS: using %d threads\n",li->nth);
 988     }
 989
 990     if (bPLINCS || li->ncg_triangle > 0)
 991     {
 992         please_cite(fplog,"Hess2008a");
 993     }
 994     else
 995     {
 996         please_cite(fplog,"Hess97a");
 997     }
 998
 999     if (fplog)
1000     {
1001         fprintf(fplog,"The number of constraints is %d\n",li->ncg);
1002         if (bPLINCS)
1003         {
1004             fprintf(fplog,"There are inter charge-group constraints,\n"
1005                     "will communicate selected coordinates each lincs iteration\n");
1006         }
1007         if (li->ncg_triangle > 0)
1008         {
1009             fprintf(fplog,
1010                     "%d constraints are involved in constraint triangles,\n"
1011                     "will apply an additional matrix expansion of order %d for couplings\n"
1012                     "between constraints inside triangles\n",
1013                     li->ncg_triangle,li->nOrder);
1014         }
1015     }
1016
1017     return li;
1018 }
1019
1020 /* Sets up the work division over the threads */
1021 static void lincs_thread_setup(struct gmx_lincsdata *li,int natoms)
1022 {
1023     lincs_thread_t *li_m;
1024     int th;
1025     unsigned *atf;
1026     int a;
1027
1028     if (natoms > li->atf_nalloc)
1029     {
1030         li->atf_nalloc = over_alloc_large(natoms);
1031         srenew(li->atf,li->atf_nalloc);
1032     }
1033
1034     atf = li->atf;
1035     /* Clear the atom flags */
1036     for(a=0; a<natoms; a++)
1037     {
1038         atf[a] = 0;
1039     }
1040
1041     for(th=0; th<li->nth; th++)
1042     {
1043         lincs_thread_t *li_th;
1044         int b;
1045
1046         li_th = &li->th[th];
1047
1048         /* The constraints are divided equally over the threads */
1049         li_th->b0 = (li->nc* th   )/li->nth;
1050         li_th->b1 = (li->nc*(th+1))/li->nth;
1051
1052         if (th < sizeof(*atf)*8)
1053         {
1054             /* For each atom set a flag for constraints from each */
1055             for(b=li_th->b0; b<li_th->b1; b++)
1056             {
1057                 atf[li->bla[b*2]  ] |= (1U<<th);
1058                 atf[li->bla[b*2+1]] |= (1U<<th);
1059             }
1060         }
1061     }
1062
1063 #pragma omp parallel for num_threads(li->nth) schedule(static)
1064     for(th=0; th<li->nth; th++)
1065     {
1066         lincs_thread_t *li_th;
1067         unsigned mask;
1068         int b;
1069
1070         li_th = &li->th[th];
1071
1072         if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
1073         {
1074             li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
1075             srenew(li_th->ind,li_th->ind_nalloc);
1076             srenew(li_th->ind_r,li_th->ind_nalloc);
1077         }
1078
1079         if (th < sizeof(*atf)*8)
1080         {
1081             mask = (1U<<th) - 1U;
1082
1083             li_th->nind   = 0;
1084             li_th->nind_r = 0;
1085             for(b=li_th->b0; b<li_th->b1; b++)
1086             {
1087                 /* We let the constraint with the lowest thread index
1088                  * operate on atoms with constraints from multiple threads.
1089                  */
1090                 if (((atf[li->bla[b*2]]   & mask) == 0) &&
1091                     ((atf[li->bla[b*2+1]] & mask) == 0))
1092                 {
1093                     /* Add the constraint to the local atom update index */
1094                     li_th->ind[li_th->nind++] = b;
1095                 }
1096                 else
1097                 {
1098                     /* Add the constraint to the rest block */
1099                     li_th->ind_r[li_th->nind_r++] = b;
1100                 }
1101             }
1102         }
1103         else
1104         {
1105             /* We are out of bits, assign all constraints to rest */
1106             for(b=li_th->b0; b<li_th->b1; b++)
1107             {
1108                 li_th->ind_r[li_th->nind_r++] = b;
1109             }
1110         }
1111     }
1112
1113     /* We need to copy all constraints which have not be assigned
1114      * to a thread to a separate list which will be handled by one thread.
1115      */
1116     li_m = &li->th[li->nth];
1117
1118     li_m->nind = 0;
1119     for(th=0; th<li->nth; th++)
1120     {
1121         lincs_thread_t *li_th;
1122         int b;
1123
1124         li_th   = &li->th[th];
1125
1126         if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1127         {
1128             li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1129             srenew(li_m->ind,li_m->ind_nalloc);
1130         }
1131
1132         for(b=0; b<li_th->nind_r; b++)
1133         {
1134             li_m->ind[li_m->nind++] = li_th->ind_r[b];
1135         }
1136
1137         if (debug)
1138         {
1139             fprintf(debug,"LINCS thread %d: %d constraints\n",
1140                     th,li_th->nind);
1141         }
1142     }
1143
1144     if (debug)
1145     {
1146         fprintf(debug,"LINCS thread r: %d constraints\n",
1147                 li_m->nind);
1148     }
1149 }
1150
1151
1152 void set_lincs(t_idef *idef,t_mdatoms *md,
1153                gmx_bool bDynamics,t_commrec *cr,
1154                struct gmx_lincsdata *li)
1155 {
1156     int      start,natoms,nflexcon;
1157     t_blocka at2con;
1158     t_iatom  *iatom;
1159     int      i,k,ncc_alloc,ni,con,nconnect,concon;
1160     int      type,a1,a2;
1161     real     lenA=0,lenB;
1162     gmx_bool     bLocal;
1163
1164     li->nc = 0;
1165     li->ncc = 0;
1166     /* Zero the thread index ranges.
1167      * Otherwise without local constraints we could return with old ranges.
1168      */
1169     for(i=0; i<li->nth; i++)
1170     {
1171         li->th[i].b0   = 0;
1172         li->th[i].b1   = 0;
1173         li->th[i].nind = 0;
1174     }
1175     if (li->nth > 1)
1176     {
1177         li->th[li->nth].nind = 0;
1178     }
1179
1180     /* This is the local topology, so there are only F_CONSTR constraints */
1181     if (idef->il[F_CONSTR].nr == 0)
1182     {
1183         /* There are no constraints,
1184          * we do not need to fill any data structures.
1185          */
1186         return;
1187     }
1188
1189     if (debug)
1190     {
1191         fprintf(debug,"Building the LINCS connectivity\n");
1192     }
1193
1194     if (DOMAINDECOMP(cr))
1195     {
1196         if (cr->dd->constraints)
1197         {
1198             dd_get_constraint_range(cr->dd,&start,&natoms);
1199         }
1200         else
1201         {
1202             natoms = cr->dd->nat_home;
1203         }
1204         start = 0;
1205     }
1206     else if(PARTDECOMP(cr))
1207         {
1208                 pd_get_constraint_range(cr->pd,&start,&natoms);
1209         }
1210         else
1211     {
1212         start  = md->start;
1213         natoms = md->homenr;
1214     }
1215     at2con = make_at2con(start,natoms,idef->il,idef->iparams,bDynamics,
1216                          &nflexcon);
1217
1218
1219     if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1220     {
1221         li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1222         srenew(li->bllen0,li->nc_alloc);
1223         srenew(li->ddist,li->nc_alloc);
1224         srenew(li->bla,2*li->nc_alloc);
1225         srenew(li->blc,li->nc_alloc);
1226         srenew(li->blc1,li->nc_alloc);
1227         srenew(li->blnr,li->nc_alloc+1);
1228         srenew(li->bllen,li->nc_alloc);
1229         srenew(li->tmpv,li->nc_alloc);
1230         srenew(li->tmp1,li->nc_alloc);
1231         srenew(li->tmp2,li->nc_alloc);
1232         srenew(li->tmp3,li->nc_alloc);
1233         srenew(li->tmp4,li->nc_alloc);
1234         srenew(li->mlambda,li->nc_alloc);
1235         if (li->ncg_triangle > 0)
1236         {
1237             /* This is allocating too much, but it is difficult to improve */
1238             srenew(li->triangle,li->nc_alloc);
1239             srenew(li->tri_bits,li->nc_alloc);
1240         }
1241     }
1242
1243     iatom = idef->il[F_CONSTR].iatoms;
1244
1245     ncc_alloc = li->ncc_alloc;
1246     li->blnr[0] = 0;
1247
1248     ni = idef->il[F_CONSTR].nr/3;
1249
1250     con = 0;
1251     nconnect = 0;
1252     li->blnr[con] = nconnect;
1253     for(i=0; i<ni; i++)
1254     {
1255         bLocal = TRUE;
1256         type = iatom[3*i];
1257         a1   = iatom[3*i+1];
1258         a2   = iatom[3*i+2];
1259         lenA = idef->iparams[type].constr.dA;
1260         lenB = idef->iparams[type].constr.dB;
1261         /* Skip the flexible constraints when not doing dynamics */
1262         if (bDynamics || lenA!=0 || lenB!=0)
1263         {
1264             li->bllen0[con]  = lenA;
1265             li->ddist[con]   = lenB - lenA;
1266             /* Set the length to the topology A length */
1267             li->bllen[con]   = li->bllen0[con];
1268             li->bla[2*con]   = a1;
1269             li->bla[2*con+1] = a2;
1270             /* Construct the constraint connection matrix blbnb */
1271             for(k=at2con.index[a1-start]; k<at2con.index[a1-start+1]; k++)
1272             {
1273                 concon = at2con.a[k];
1274                 if (concon != i)
1275                 {
1276                     if (nconnect >= ncc_alloc)
1277                     {
1278                         ncc_alloc = over_alloc_small(nconnect+1);
1279                         srenew(li->blbnb,ncc_alloc);
1280                     }
1281                     li->blbnb[nconnect++] = concon;
1282                 }
1283             }
1284             for(k=at2con.index[a2-start]; k<at2con.index[a2-start+1]; k++)
1285             {
1286                 concon = at2con.a[k];
1287                 if (concon != i)
1288                 {
1289                     if (nconnect+1 > ncc_alloc)
1290                     {
1291                         ncc_alloc = over_alloc_small(nconnect+1);
1292                         srenew(li->blbnb,ncc_alloc);
1293                     }
1294                     li->blbnb[nconnect++] = concon;
1295                 }
1296             }
1297             li->blnr[con+1] = nconnect;
1298
1299             if (cr->dd == NULL)
1300             {
1301                 /* Order the blbnb matrix to optimize memory access */
1302                 qsort(&(li->blbnb[li->blnr[con]]),li->blnr[con+1]-li->blnr[con],
1303                       sizeof(li->blbnb[0]),int_comp);
1304             }
1305             /* Increase the constraint count */
1306             con++;
1307         }
1308     }
1309
1310     done_blocka(&at2con);
1311
1312     /* This is the real number of constraints,
1313      * without dynamics the flexible constraints are not present.
1314      */
1315     li->nc = con;
1316
1317     li->ncc = li->blnr[con];
1318     if (cr->dd == NULL)
1319     {
1320         /* Since the matrix is static, we can free some memory */
1321         ncc_alloc = li->ncc;
1322         srenew(li->blbnb,ncc_alloc);
1323     }
1324
1325     if (ncc_alloc > li->ncc_alloc)
1326     {
1327         li->ncc_alloc = ncc_alloc;
1328         srenew(li->blmf,li->ncc_alloc);
1329         srenew(li->blmf1,li->ncc_alloc);
1330         srenew(li->tmpncc,li->ncc_alloc);
1331     }
1332
1333     if (debug)
1334     {
1335         fprintf(debug,"Number of constraints is %d, couplings %d\n",
1336                 li->nc,li->ncc);
1337     }
1338
1339     if (li->nth == 1)
1340     {
1341         li->th[0].b0 = 0;
1342         li->th[0].b1 = li->nc;
1343     }
1344     else
1345     {
1346         lincs_thread_setup(li,md->nr);
1347     }
1348
1349     set_lincs_matrix(li,md->invmass,md->lambda);
1350 }
1351
1352 static void lincs_warning(FILE *fplog,
1353                           gmx_domdec_t *dd,rvec *x,rvec *xprime,t_pbc *pbc,
1354                           int ncons,int *bla,real *bllen,real wangle,
1355                           int maxwarn,int *warncount)
1356 {
1357     int b,i,j;
1358     rvec v0,v1;
1359     real wfac,d0,d1,cosine;
1360     char buf[STRLEN];
1361
1362     wfac=cos(DEG2RAD*wangle);
1363
1364     sprintf(buf,"bonds that rotated more than %g degrees:\n"
1365             " atom 1 atom 2  angle  previous, current, constraint length\n",
1366             wangle);
1367     fprintf(stderr,"%s",buf);
1368     if (fplog)
1369     {
1370         fprintf(fplog,"%s",buf);
1371     }
1372
1373     for(b=0;b<ncons;b++)
1374     {
1375         i = bla[2*b];
1376         j = bla[2*b+1];
1377         if (pbc)
1378         {
1379             pbc_dx_aiuc(pbc,x[i],x[j],v0);
1380             pbc_dx_aiuc(pbc,xprime[i],xprime[j],v1);
1381         }
1382         else
1383         {
1384             rvec_sub(x[i],x[j],v0);
1385             rvec_sub(xprime[i],xprime[j],v1);
1386         }
1387         d0 = norm(v0);
1388         d1 = norm(v1);
1389         cosine = iprod(v0,v1)/(d0*d1);
1390         if (cosine < wfac)
1391         {
1392             sprintf(buf," %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n",
1393                     ddglatnr(dd,i),ddglatnr(dd,j),
1394                     RAD2DEG*acos(cosine),d0,d1,bllen[b]);
1395             fprintf(stderr,"%s",buf);
1396             if (fplog)
1397             {
1398                 fprintf(fplog,"%s",buf);
1399             }
1400             if (!gmx_isfinite(d1))
1401             {
1402                 gmx_fatal(FARGS,"Bond length not finite.");
1403             }
1404
1405             (*warncount)++;
1406         }
1407     }
1408     if (*warncount > maxwarn)
1409     {
1410         too_many_constraint_warnings(econtLINCS,*warncount);
1411     }
1412 }
1413
1414 static void cconerr(gmx_domdec_t *dd,
1415                     int ncons,int *bla,real *bllen,rvec *x,t_pbc *pbc,
1416                     real *ncons_loc,real *ssd,real *max,int *imax)
1417 {
1418     real      len,d,ma,ssd2,r2;
1419     int       *nlocat,count,b,im;
1420     rvec      dx;
1421
1422     if (dd && dd->constraints)
1423     {
1424         nlocat = dd_constraints_nlocalatoms(dd);
1425     }
1426     else
1427     {
1428         nlocat = 0;
1429     }
1430
1431     ma = 0;
1432     ssd2 = 0;
1433     im = 0;
1434     count = 0;
1435     for(b=0;b<ncons;b++)
1436     {
1437         if (pbc)
1438         {
1439             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
1440         }
1441         else {
1442             rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
1443         }
1444         r2 = norm2(dx);
1445         len = r2*gmx_invsqrt(r2);
1446         d = fabs(len/bllen[b]-1);
1447         if (d > ma && (nlocat==NULL || nlocat[b]))
1448         {
1449             ma = d;
1450             im = b;
1451         }
1452         if (nlocat == NULL)
1453         {
1454             ssd2 += d*d;
1455             count++;
1456         }
1457         else
1458         {
1459             ssd2 += nlocat[b]*d*d;
1460             count += nlocat[b];
1461         }
1462     }
1463
1464     *ncons_loc = (nlocat ? 0.5 : 1)*count;
1465     *ssd       = (nlocat ? 0.5 : 1)*ssd2;
1466     *max = ma;
1467     *imax = im;
1468 }
1469
1470 static void dump_conf(gmx_domdec_t *dd,struct gmx_lincsdata *li,
1471                       t_blocka *at2con,
1472                       char *name,gmx_bool bAll,rvec *x,matrix box)
1473 {
1474     char str[STRLEN];
1475     FILE *fp;
1476     int  ac0,ac1,i;
1477
1478     dd_get_constraint_range(dd,&ac0,&ac1);
1479
1480     sprintf(str,"%s_%d_%d_%d.pdb",name,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
1481     fp = gmx_fio_fopen(str,"w");
1482     fprintf(fp,"CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1\n",
1483             10*norm(box[XX]),10*norm(box[YY]),10*norm(box[ZZ]),
1484             90.0,90.0,90.0);
1485     for(i=0; i<ac1; i++)
1486     {
1487         if (i < dd->nat_home || (bAll && i >= ac0 && i < ac1))
1488         {
1489             fprintf(fp,"%-6s%5u  %-4.4s%3.3s %c%4d    %8.3f%8.3f%8.3f%6.2f%6.2f\n",
1490                     "ATOM",ddglatnr(dd,i),"C","ALA",' ',i+1,
1491                     10*x[i][XX],10*x[i][YY],10*x[i][ZZ],
1492                     1.0,i<dd->nat_tot ? 0.0 : 1.0);
1493         }
1494     }
1495     if (bAll)
1496     {
1497         for(i=0; i<li->nc; i++)
1498         {
1499             fprintf(fp,"CONECT%5d%5d\n",
1500                     ddglatnr(dd,li->bla[2*i]),
1501                     ddglatnr(dd,li->bla[2*i+1]));
1502         }
1503     }
1504     gmx_fio_fclose(fp);
1505 }
1506
1507 gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
1508                          t_inputrec *ir,
1509                          gmx_large_int_t step,
1510                          struct gmx_lincsdata *lincsd,t_mdatoms *md,
1511                          t_commrec *cr,
1512                          rvec *x,rvec *xprime,rvec *min_proj,
1513                          matrix box,t_pbc *pbc,
1514                          real lambda,real *dvdlambda,
1515                          real invdt,rvec *v,
1516                          gmx_bool bCalcVir,tensor vir_r_m_dr,
1517                          int econq,
1518                          t_nrnb *nrnb,
1519                          int maxwarn,int *warncount)
1520 {
1521     char  buf[STRLEN],buf2[22],buf3[STRLEN];
1522     int   i,warn,p_imax,error;
1523     real  ncons_loc,p_ssd,p_max=0;
1524     rvec  dx;
1525     gmx_bool  bOK;
1526
1527     bOK = TRUE;
1528
1529     if (lincsd->nc == 0 && cr->dd == NULL)
1530     {
1531         if (bLog || bEner)
1532         {
1533             lincsd->rmsd_data[0] = 0;
1534             if (ir->eI == eiSD2 && v == NULL)
1535             {
1536                 i = 2;
1537             }
1538             else
1539             {
1540                 i = 1;
1541             }
1542             lincsd->rmsd_data[i] = 0;
1543         }
1544
1545         return bOK;
1546     }
1547
1548     if (econq == econqCoord)
1549     {
1550         if (ir->efep != efepNO)
1551         {
1552             if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1553             {
1554                 set_lincs_matrix(lincsd,md->invmass,md->lambda);
1555             }
1556
1557             for(i=0; i<lincsd->nc; i++)
1558             {
1559                 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1560             }
1561         }
1562
1563         if (lincsd->ncg_flex)
1564         {
1565             /* Set the flexible constraint lengths to the old lengths */
1566             if (pbc != NULL)
1567             {
1568                 for(i=0; i<lincsd->nc; i++)
1569                 {
1570                     if (lincsd->bllen[i] == 0) {
1571                         pbc_dx_aiuc(pbc,x[lincsd->bla[2*i]],x[lincsd->bla[2*i+1]],dx);
1572                         lincsd->bllen[i] = norm(dx);
1573                     }
1574                 }
1575             }
1576             else
1577             {
1578                 for(i=0; i<lincsd->nc; i++)
1579                 {
1580                     if (lincsd->bllen[i] == 0)
1581                     {
1582                         lincsd->bllen[i] =
1583                             sqrt(distance2(x[lincsd->bla[2*i]],
1584                                            x[lincsd->bla[2*i+1]]));
1585                     }
1586                 }
1587             }
1588         }
1589
1590         if (bLog && fplog)
1591         {
1592             cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1593                     &ncons_loc,&p_ssd,&p_max,&p_imax);
1594         }
1595
1596         /* This warn var can be updated by multiple threads
1597          * at the same time. But as we only need to detect
1598          * if a warning occured or not, this is not an issue.
1599          */
1600         warn = -1;
1601
1602         /* The OpenMP parallel region of constrain_lincs for coords */
1603 #pragma omp parallel num_threads(lincsd->nth)
1604         {
1605             int th=gmx_omp_get_thread_num();
1606
1607             clear_mat(lincsd->th[th].vir_r_m_dr);
1608
1609             do_lincs(x,xprime,box,pbc,lincsd,th,
1610                      md->invmass,cr,
1611                      bCalcVir || (ir->efep != efepNO),
1612                      ir->LincsWarnAngle,&warn,
1613                      invdt,v,bCalcVir,
1614                      th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1615         }
1616
1617         if (ir->efep != efepNO)
1618         {
1619             real dt_2,dvdl=0;
1620
1621             dt_2 = 1.0/(ir->delta_t*ir->delta_t);
1622             for(i=0; (i<lincsd->nc); i++)
1623             {
1624                 dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
1625             }
1626             *dvdlambda += dvdl;
1627                 }
1628
1629         if (bLog && fplog && lincsd->nc > 0)
1630         {
1631             fprintf(fplog,"   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
1632             fprintf(fplog,"       Before LINCS          %.6f    %.6f %6d %6d\n",
1633                     sqrt(p_ssd/ncons_loc),p_max,
1634                     ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1635                     ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1636         }
1637         if (bLog || bEner)
1638         {
1639             cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1640                     &ncons_loc,&p_ssd,&p_max,&p_imax);
1641             /* Check if we are doing the second part of SD */
1642             if (ir->eI == eiSD2 && v == NULL)
1643             {
1644                 i = 2;
1645             }
1646             else
1647             {
1648                 i = 1;
1649             }
1650             lincsd->rmsd_data[0] = ncons_loc;
1651             lincsd->rmsd_data[i] = p_ssd;
1652         }
1653         else
1654         {
1655             lincsd->rmsd_data[0] = 0;
1656             lincsd->rmsd_data[1] = 0;
1657             lincsd->rmsd_data[2] = 0;
1658         }
1659         if (bLog && fplog && lincsd->nc > 0)
1660         {
1661             fprintf(fplog,
1662                     "        After LINCS          %.6f    %.6f %6d %6d\n\n",
1663                     sqrt(p_ssd/ncons_loc),p_max,
1664                     ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1665                     ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1666         }
1667
1668         if (warn >= 0)
1669         {
1670             if (maxwarn >= 0)
1671             {
1672                 cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1673                         &ncons_loc,&p_ssd,&p_max,&p_imax);
1674                 if (MULTISIM(cr))
1675                 {
1676                     sprintf(buf3," in simulation %d", cr->ms->sim);
1677                 }
1678                 else
1679                 {
1680                     buf3[0] = 0;
1681                 }
1682                 sprintf(buf,"\nStep %s, time %g (ps)  LINCS WARNING%s\n"
1683                         "relative constraint deviation after LINCS:\n"
1684                         "rms %.6f, max %.6f (between atoms %d and %d)\n",
1685                         gmx_step_str(step,buf2),ir->init_t+step*ir->delta_t,
1686                         buf3,
1687                         sqrt(p_ssd/ncons_loc),p_max,
1688                         ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1689                         ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1690                 if (fplog)
1691                 {
1692                     fprintf(fplog,"%s",buf);
1693                 }
1694                 fprintf(stderr,"%s",buf);
1695                 lincs_warning(fplog,cr->dd,x,xprime,pbc,
1696                               lincsd->nc,lincsd->bla,lincsd->bllen,
1697                               ir->LincsWarnAngle,maxwarn,warncount);
1698             }
1699             bOK = (p_max < 0.5);
1700         }
1701
1702         if (lincsd->ncg_flex) {
1703             for(i=0; (i<lincsd->nc); i++)
1704                 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1705                     lincsd->bllen[i] = 0;
1706         }
1707     }
1708     else
1709     {
1710         /* The OpenMP parallel region of constrain_lincs for derivatives */
1711 #pragma omp parallel num_threads(lincsd->nth)
1712         {
1713             int th=gmx_omp_get_thread_num();
1714
1715             do_lincsp(x,xprime,min_proj,pbc,lincsd,th,
1716                       md->invmass,econq,ir->efep != efepNO ? dvdlambda : NULL,
1717                       bCalcVir,th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1718         }
1719     }
1720
1721     if (bCalcVir && lincsd->nth > 1)
1722     {
1723         for(i=1; i<lincsd->nth; i++)
1724         {
1725             m_add(vir_r_m_dr,lincsd->th[i].vir_r_m_dr,vir_r_m_dr);
1726         }
1727     }
1728
1729     /* count assuming nit=1 */
1730     inc_nrnb(nrnb,eNR_LINCS,lincsd->nc);
1731     inc_nrnb(nrnb,eNR_LINCSMAT,(2+lincsd->nOrder)*lincsd->ncc);
1732     if (lincsd->ntriangle > 0)
1733     {
1734         inc_nrnb(nrnb,eNR_LINCSMAT,lincsd->nOrder*lincsd->ncc_triangle);
1735     }
1736     if (v)
1737     {
1738         inc_nrnb(nrnb,eNR_CONSTR_V,lincsd->nc*2);
1739     }
1740     if (bCalcVir)
1741     {
1742         inc_nrnb(nrnb,eNR_CONSTR_VIR,lincsd->nc);
1743     }
1744
1745     return bOK;
1746 }