src/gromacs/mdlib/clincs.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15  *
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 /* This file is completely threadsafe - keep it that way! */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include "main.h"
  43 #include "constr.h"
  44 #include "copyrite.h"
  45 #include "physics.h"
  46 #include "vec.h"
  47 #include "pbc.h"
  48 #include "smalloc.h"
  49 #include "mdrun.h"
  50 #include "nrnb.h"
  51 #include "domdec.h"
  52 #include "partdec.h"
  53 #include "mtop_util.h"
  54 #include "gmxfio.h"
  55 #include "gmx_omp_nthreads.h"
  56 #include "gmx_omp.h"
  57
  58 typedef struct {
  59     int b0;           /* first constraint for this thread */
  60     int b1;           /* b1-1 is the last constraint for this thread */
  61     int nind;         /* number of indices */
  62     int *ind;         /* constraint index for updating atom data */
  63     int nind_r;       /* number of indices */
  64     int *ind_r;       /* constraint index for updating atom data */
  65     int ind_nalloc;   /* allocation size of ind and ind_r */
  66 } lincs_thread_t;
  67
  68 typedef struct gmx_lincsdata {
  69     int  ncg;         /* the global number of constraints */
  70     int  ncg_flex;    /* the global number of flexible constraints */
  71     int  ncg_triangle;/* the global number of constraints in triangles */
  72     int  nIter;       /* the number of iterations */
  73     int  nOrder;      /* the order of the matrix expansion */
  74     int  nc;          /* the number of constraints */
  75     int  nc_alloc;    /* the number we allocated memory for */
  76     int  ncc;         /* the number of constraint connections */
  77     int  ncc_alloc;   /* the number we allocated memory for */
  78     real matlam;      /* the FE lambda value used for filling blc and blmf */
  79     real *bllen0;     /* the reference distance in topology A */
  80     real *ddist;      /* the reference distance in top B - the r.d. in top A */
  81     int  *bla;        /* the atom pairs involved in the constraints */
  82     real *blc;        /* 1/sqrt(invmass1 + invmass2) */
  83     real *blc1;       /* as blc, but with all masses 1 */
  84     int  *blnr;       /* index into blbnb and blmf */
  85     int  *blbnb;      /* list of constraint connections */
  86     int  ntriangle;   /* the local number of constraints in triangles */
  87     int  *triangle;   /* the list of triangle constraints */
  88     int  *tri_bits;   /* the bits tell if the matrix element should be used */
  89     int  ncc_triangle;/* the number of constraint connections in triangles */
  90     real *blmf;       /* matrix of mass factors for constraint connections */
  91     real *blmf1;      /* as blmf, but with all masses 1 */
  92     real *bllen;      /* the reference bond length */
  93     int  nth;         /* The number of threads doing LINCS */
  94     lincs_thread_t *th; /* LINCS thread division */
  95     unsigned *atf;    /* atom flags for thread parallelization */
  96     int  atf_nalloc;  /* allocation size of atf */
  97     /* arrays for temporary storage in the LINCS algorithm */
  98     rvec *tmpv;
  99     real *tmpncc;
 100     real *tmp1;
 101     real *tmp2;
 102     real *tmp3;
 103     real *tmp4;
 104     real *mlambda;  /* the Lagrange multipliers * -1 */
 105     /* storage for the constraint RMS relative deviation output */
 106     real rmsd_data[3];
 107 } t_gmx_lincsdata;
 108
 109 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
 110 {
 111     return lincsd->rmsd_data;
 112 }
 113
 114 real lincs_rmsd(struct gmx_lincsdata *lincsd,gmx_bool bSD2)
 115 {
 116     if (lincsd->rmsd_data[0] > 0)
 117     {
 118         return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
 119     }
 120     else
 121     {
 122         return 0;
 123     }
 124 }
 125
 126 /* Do a set of nrec LINCS matrix multiplications.
 127  * This function will return with up to date thread-local
 128  * constraint data, without an OpenMP barrier.
 129  */
 130 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
 131                                 int b0,int b1,
 132                                 const real *blcc,
 133                                 real *rhs1,real *rhs2,real *sol)
 134 {
 135     int  nrec,rec,b,j,n,nr0,nr1;
 136     real mvb,*swap;
 137     int  ntriangle,tb,bits;
 138     const int *blnr=lincsd->blnr,*blbnb=lincsd->blbnb;
 139     const int *triangle=lincsd->triangle,*tri_bits=lincsd->tri_bits;
 140
 141     ntriangle = lincsd->ntriangle;
 142     nrec      = lincsd->nOrder;
 143
 144     for(rec=0; rec<nrec; rec++)
 145     {
 146 #pragma omp barrier
 147         for(b=b0; b<b1; b++)
 148         {
 149             mvb = 0;
 150             for(n=blnr[b]; n<blnr[b+1]; n++)
 151             {
 152                 j = blbnb[n];
 153                 mvb = mvb + blcc[n]*rhs1[j];
 154             }
 155             rhs2[b] = mvb;
 156             sol[b]  = sol[b] + mvb;
 157         }
 158         swap = rhs1;
 159         rhs1 = rhs2;
 160         rhs2 = swap;
 161     } /* nrec*(ncons+2*nrtot) flops */
 162
 163     if (ntriangle > 0)
 164     {
 165         /* Perform an extra nrec recursions for only the constraints
 166          * involved in rigid triangles.
 167          * In this way their accuracy should come close to those of the other
 168          * constraints, since traingles of constraints can produce eigenvalues
 169          * around 0.7, while the effective eigenvalue for bond constraints
 170          * is around 0.4 (and 0.7*0.7=0.5).
 171          */
 172         /* We need to copy the temporary array, since only the elements
 173          * for constraints involved in triangles are updated and then
 174          * the pointers are swapped. This saving copying the whole arrary.
 175          * We need barrier as other threads might still be reading from rhs2.
 176          */
 177 #pragma omp barrier
 178         for(b=b0; b<b1; b++)
 179         {
 180             rhs2[b] = rhs1[b];
 181         }
 182 #pragma omp barrier
 183 #pragma omp master
 184         {
 185             for(rec=0; rec<nrec; rec++)
 186             {
 187                 for(tb=0; tb<ntriangle; tb++)
 188                 {
 189                     b    = triangle[tb];
 190                     bits = tri_bits[tb];
 191                     mvb = 0;
 192                     nr0 = blnr[b];
 193                     nr1 = blnr[b+1];
 194                     for(n=nr0; n<nr1; n++)
 195                     {
 196                         if (bits & (1<<(n-nr0)))
 197                         {
 198                             j = blbnb[n];
 199                             mvb = mvb + blcc[n]*rhs1[j];
 200                         }
 201                     }
 202                     rhs2[b] = mvb;
 203                     sol[b]  = sol[b] + mvb;
 204                 }
 205                 swap = rhs1;
 206                 rhs1 = rhs2;
 207                 rhs2 = swap;
 208             }
 209         } /* flops count is missing here */
 210
 211         /* We need a barrier here as the calling routine will continue
 212          * to operate on the thread-local constraints without barrier.
 213          */
 214 #pragma omp barrier
 215     }
 216 }
 217
 218 static void lincs_update_atoms_noind(int ncons,const int *bla,
 219                                      real prefac,
 220                                      const real *fac,rvec *r,
 221                                      const real *invmass,
 222                                      rvec *x)
 223 {
 224     int  b,i,j;
 225     real mvb,im1,im2,tmp0,tmp1,tmp2;
 226
 227     for(b=0; b<ncons; b++)
 228     {
 229         i = bla[2*b];
 230         j = bla[2*b+1];
 231         mvb = prefac*fac[b];
 232         im1 = invmass[i];
 233         im2 = invmass[j];
 234         tmp0 = r[b][0]*mvb;
 235         tmp1 = r[b][1]*mvb;
 236         tmp2 = r[b][2]*mvb;
 237         x[i][0] -= tmp0*im1;
 238         x[i][1] -= tmp1*im1;
 239         x[i][2] -= tmp2*im1;
 240         x[j][0] += tmp0*im2;
 241         x[j][1] += tmp1*im2;
 242         x[j][2] += tmp2*im2;
 243     } /* 16 ncons flops */
 244 }
 245
 246 static void lincs_update_atoms_ind(int ncons,const int *ind,const int *bla,
 247                                    real prefac,
 248                                    const real *fac,rvec *r,
 249                                    const real *invmass,
 250                                    rvec *x)
 251 {
 252     int  bi,b,i,j;
 253     real mvb,im1,im2,tmp0,tmp1,tmp2;
 254
 255     for(bi=0; bi<ncons; bi++)
 256     {
 257         b = ind[bi];
 258         i = bla[2*b];
 259         j = bla[2*b+1];
 260         mvb = prefac*fac[b];
 261         im1 = invmass[i];
 262         im2 = invmass[j];
 263         tmp0 = r[b][0]*mvb;
 264         tmp1 = r[b][1]*mvb;
 265         tmp2 = r[b][2]*mvb;
 266         x[i][0] -= tmp0*im1;
 267         x[i][1] -= tmp1*im1;
 268         x[i][2] -= tmp2*im1;
 269         x[j][0] += tmp0*im2;
 270         x[j][1] += tmp1*im2;
 271         x[j][2] += tmp2*im2;
 272     } /* 16 ncons flops */
 273 }
 274
 275 static void lincs_update_atoms(struct gmx_lincsdata *li,int th,
 276                                real prefac,
 277                                const real *fac,rvec *r,
 278                                const real *invmass,
 279                                rvec *x)
 280 {
 281     if (li->nth == 1)
 282     {
 283         /* Single thread, we simply update for all constraints */
 284         lincs_update_atoms_noind(li->nc,li->bla,prefac,fac,r,invmass,x);
 285     }
 286     else
 287     {
 288         /* Update the atom vector components for our thread local
 289          * constraints that only access our local atom range.
 290          * This can be done without a barrier.
 291          */
 292         lincs_update_atoms_ind(li->th[th].nind,li->th[th].ind,
 293                                li->bla,prefac,fac,r,invmass,x);
 294
 295         if (li->th[li->nth].nind > 0)
 296         {
 297             /* Update the constraints that operate on atoms
 298              * in multiple thread atom blocks on the master thread.
 299              */
 300 #pragma omp barrier
 301 #pragma omp master
 302             {
 303                 lincs_update_atoms_ind(li->th[li->nth].nind,
 304                                        li->th[li->nth].ind,
 305                                        li->bla,prefac,fac,r,invmass,x);
 306             }
 307         }
 308     }
 309 }
 310
 311 static void do_lincsp(rvec *x,rvec *f,rvec *fp,t_pbc *pbc,
 312                       struct gmx_lincsdata *lincsd,real *invmass,
 313                       int econq,real *dvdlambda,
 314                       gmx_bool bCalcVir,tensor rmdf)
 315 {
 316     int     b,i,j,k,n;
 317     real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,wfac,lam;
 318     rvec    dx;
 319     int     ncons,*bla,*blnr,*blbnb;
 320     rvec    *r;
 321     real    *blc,*blmf,*blcc,*rhs1,*rhs2,*sol;
 322
 323     ncons  = lincsd->nc;
 324     bla    = lincsd->bla;
 325     r      = lincsd->tmpv;
 326     blnr   = lincsd->blnr;
 327     blbnb  = lincsd->blbnb;
 328     if (econq != econqForce)
 329     {
 330         /* Use mass-weighted parameters */
 331         blc  = lincsd->blc;
 332         blmf = lincsd->blmf;
 333     }
 334     else
 335     {
 336         /* Use non mass-weighted parameters */
 337         blc  = lincsd->blc1;
 338         blmf = lincsd->blmf1;
 339     }
 340     blcc   = lincsd->tmpncc;
 341     rhs1   = lincsd->tmp1;
 342     rhs2   = lincsd->tmp2;
 343     sol    = lincsd->tmp3;
 344
 345     if (econq != econqForce)
 346     {
 347         dvdlambda = NULL;
 348     }
 349
 350     /* Compute normalized i-j vectors */
 351     if (pbc)
 352     {
 353         for(b=0; b<ncons; b++)
 354         {
 355             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
 356             unitv(dx,r[b]);
 357         }
 358     }
 359     else
 360     {
 361         for(b=0; b<ncons; b++)
 362         {
 363             rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
 364             unitv(dx,r[b]);
 365         } /* 16 ncons flops */
 366     }
 367
 368     for(b=0; b<ncons; b++)
 369     {
 370         tmp0 = r[b][0];
 371         tmp1 = r[b][1];
 372         tmp2 = r[b][2];
 373         i = bla[2*b];
 374         j = bla[2*b+1];
 375         for(n=blnr[b]; n<blnr[b+1]; n++)
 376         {
 377             k = blbnb[n];
 378             blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 379         } /* 6 nr flops */
 380         mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
 381                       tmp1*(f[i][1] - f[j][1]) +
 382                       tmp2*(f[i][2] - f[j][2]));
 383         rhs1[b] = mvb;
 384         sol[b]  = mvb;
 385         /* 7 flops */
 386     }
 387     /* Together: 23*ncons + 6*nrtot flops */
 388
 389     lincs_matrix_expand(lincsd,0,ncons,blcc,rhs1,rhs2,sol);
 390     /* nrec*(ncons+2*nrtot) flops */
 391
 392     if (econq != econqForce)
 393     {
 394         for(b=0; b<ncons; b++)
 395         {
 396             /* With econqDeriv_FlexCon only use the flexible constraints */
 397             if (econq != econqDeriv_FlexCon ||
 398                 (lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
 399             {
 400                 i = bla[2*b];
 401                 j = bla[2*b+1];
 402                 mvb = blc[b]*sol[b];
 403                 im1 = invmass[i];
 404                 im2 = invmass[j];
 405                 tmp0 = r[b][0]*mvb;
 406                 tmp1 = r[b][1]*mvb;
 407                 tmp2 = r[b][2]*mvb;
 408                 fp[i][0] -= tmp0*im1;
 409                 fp[i][1] -= tmp1*im1;
 410                 fp[i][2] -= tmp2*im1;
 411                 fp[j][0] += tmp0*im2;
 412                 fp[j][1] += tmp1*im2;
 413                 fp[j][2] += tmp2*im2;
 414                 if (dvdlambda)
 415                 {
 416                     /* This is only correct with forces and invmass=1 */
 417                     *dvdlambda -= mvb*lincsd->ddist[b];
 418                 }
 419             }
 420         } /* 16 ncons flops */
 421     }
 422     else
 423     {
 424         for(b=0; b<ncons; b++)
 425         {
 426             i = bla[2*b];
 427             j = bla[2*b+1];
 428             mvb = blc[b]*sol[b];
 429             tmp0 = r[b][0]*mvb;
 430             tmp1 = r[b][1]*mvb;
 431             tmp2 = r[b][2]*mvb;
 432             fp[i][0] -= tmp0;
 433             fp[i][1] -= tmp1;
 434             fp[i][2] -= tmp2;
 435             fp[j][0] += tmp0;
 436             fp[j][1] += tmp1;
 437             fp[j][2] += tmp2;
 438             if (dvdlambda)
 439             {
 440                 *dvdlambda -= mvb*lincsd->ddist[b];
 441             }
 442         }
 443         /* 10 ncons flops */
 444     }
 445
 446     if (bCalcVir)
 447     {
 448         /* Constraint virial,
 449          * determines sum r_bond x delta f,
 450          * where delta f is the constraint correction
 451          * of the quantity that is being constrained.
 452          */
 453         for(b=0; b<ncons; b++)
 454         {
 455             mvb = lincsd->bllen[b]*blc[b]*sol[b];
 456             for(i=0; i<DIM; i++)
 457             {
 458                 tmp1 = mvb*r[b][i];
 459                 for(j=0; j<DIM; j++)
 460                 {
 461                     rmdf[i][j] += tmp1*r[b][j];
 462                 }
 463             }
 464         } /* 23 ncons flops */
 465     }
 466 }
 467
 468 static void do_lincs(rvec *x,rvec *xp,matrix box,t_pbc *pbc,
 469                      struct gmx_lincsdata *lincsd,int th,
 470                      real *invmass,
 471                                          t_commrec *cr,
 472                      gmx_bool bCalcLambda,
 473                      real wangle,int *warn,
 474                      real invdt,rvec *v,
 475                      gmx_bool bCalcVir,tensor rmdr)
 476 {
 477     int     b0,b1,b,i,j,k,n,iter;
 478     real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,len2,dlen2,wfac;
 479     rvec    dx;
 480     int     ncons,*bla,*blnr,*blbnb;
 481     rvec    *r;
 482     real    *blc,*blmf,*bllen,*blcc,*rhs1,*rhs2,*sol,*blc_sol,*mlambda;
 483     int     *nlocat;
 484
 485     b0 = lincsd->th[th].b0;
 486     b1 = lincsd->th[th].b1;
 487
 488     ncons  = lincsd->nc;
 489     bla    = lincsd->bla;
 490     r      = lincsd->tmpv;
 491     blnr   = lincsd->blnr;
 492     blbnb  = lincsd->blbnb;
 493     blc    = lincsd->blc;
 494     blmf   = lincsd->blmf;
 495     bllen  = lincsd->bllen;
 496     blcc   = lincsd->tmpncc;
 497     rhs1   = lincsd->tmp1;
 498     rhs2   = lincsd->tmp2;
 499     sol    = lincsd->tmp3;
 500     blc_sol= lincsd->tmp4;
 501     mlambda= lincsd->mlambda;
 502
 503     if (DOMAINDECOMP(cr) && cr->dd->constraints)
 504     {
 505         nlocat = dd_constraints_nlocalatoms(cr->dd);
 506     }
 507     else if (PARTDECOMP(cr))
 508     {
 509         nlocat = pd_constraints_nlocalatoms(cr->pd);
 510     }
 511     else
 512     {
 513         nlocat = NULL;
 514     }
 515
 516     *warn = 0;
 517
 518     if (pbc)
 519     {
 520         /* Compute normalized i-j vectors */
 521         for(b=b0; b<b1; b++)
 522         {
 523             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
 524             unitv(dx,r[b]);
 525         }
 526 #pragma omp barrier
 527         for(b=b0; b<b1; b++)
 528         {
 529             for(n=blnr[b]; n<blnr[b+1]; n++)
 530             {
 531                 blcc[n] = blmf[n]*iprod(r[b],r[blbnb[n]]);
 532             }
 533             pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
 534             mvb = blc[b]*(iprod(r[b],dx) - bllen[b]);
 535             rhs1[b] = mvb;
 536             sol[b]  = mvb;
 537         }
 538     }
 539     else
 540     {
 541         /* Compute normalized i-j vectors */
 542         for(b=b0; b<b1; b++)
 543         {
 544             i = bla[2*b];
 545             j = bla[2*b+1];
 546             tmp0 = x[i][0] - x[j][0];
 547             tmp1 = x[i][1] - x[j][1];
 548             tmp2 = x[i][2] - x[j][2];
 549             rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
 550             r[b][0] = rlen*tmp0;
 551             r[b][1] = rlen*tmp1;
 552             r[b][2] = rlen*tmp2;
 553         } /* 16 ncons flops */
 554
 555 #pragma omp barrier
 556         for(b=b0; b<b1; b++)
 557         {
 558             tmp0 = r[b][0];
 559             tmp1 = r[b][1];
 560             tmp2 = r[b][2];
 561             len = bllen[b];
 562             i = bla[2*b];
 563             j = bla[2*b+1];
 564             for(n=blnr[b]; n<blnr[b+1]; n++)
 565             {
 566                 k = blbnb[n];
 567                 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 568             } /* 6 nr flops */
 569             mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
 570                           tmp1*(xp[i][1] - xp[j][1]) +
 571                           tmp2*(xp[i][2] - xp[j][2]) - len);
 572             rhs1[b] = mvb;
 573             sol[b]  = mvb;
 574             /* 10 flops */
 575         }
 576         /* Together: 26*ncons + 6*nrtot flops */
 577     }
 578
 579     lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 580     /* nrec*(ncons+2*nrtot) flops */
 581
 582     for(b=b0; b<b1; b++)
 583     {
 584         mlambda[b] = blc[b]*sol[b];
 585     }
 586
 587     /* Update the coordinates */
 588     lincs_update_atoms(lincsd,th,1.0,mlambda,r,invmass,xp);
 589
 590     /*
 591      ********  Correction for centripetal effects  ********
 592      */
 593
 594     wfac = cos(DEG2RAD*wangle);
 595     wfac = wfac*wfac;
 596
 597     for(iter=0; iter<lincsd->nIter; iter++)
 598     {
 599         if ((DOMAINDECOMP(cr) && cr->dd->constraints) ||
 600             PARTDECOMP(cr))
 601         {
 602 #pragma omp barrier
 603 #pragma omp master
 604             {
 605                  /* Communicate the corrected non-local coordinates */
 606                 if (DOMAINDECOMP(cr))
 607                 {
 608                     dd_move_x_constraints(cr->dd,box,xp,NULL);
 609                 }
 610                 else
 611                 {
 612                     pd_move_x_constraints(cr,xp,NULL);
 613                 }
 614             }
 615         }
 616
 617 #pragma omp barrier
 618         for(b=b0; b<b1; b++)
 619         {
 620             len = bllen[b];
 621             if (pbc)
 622             {
 623                 pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
 624             }
 625             else
 626             {
 627                 rvec_sub(xp[bla[2*b]],xp[bla[2*b+1]],dx);
 628             }
 629             len2 = len*len;
 630             dlen2 = 2*len2 - norm2(dx);
 631             if (dlen2 < wfac*len2 && (nlocat==NULL || nlocat[b]))
 632             {
 633                 *warn = b;
 634             }
 635             if (dlen2 > 0)
 636             {
 637                 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
 638             }
 639             else
 640             {
 641                 mvb = blc[b]*len;
 642             }
 643             rhs1[b] = mvb;
 644             sol[b]  = mvb;
 645         } /* 20*ncons flops */
 646
 647         lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 648         /* nrec*(ncons+2*nrtot) flops */
 649
 650         for(b=b0; b<b1; b++)
 651         {
 652             mvb = blc[b]*sol[b];
 653             blc_sol[b]  = mvb;
 654             mlambda[b] += mvb;
 655         }
 656
 657         /* Update the coordinates */
 658         lincs_update_atoms(lincsd,th,1.0,blc_sol,r,invmass,xp);
 659     }
 660     /* nit*ncons*(37+9*nrec) flops */
 661
 662     if (v != NULL)
 663     {
 664         /* Update the velocities */
 665         lincs_update_atoms(lincsd,th,invdt,mlambda,r,invmass,v);
 666         /* 16 ncons flops */
 667     }
 668
 669     if (nlocat && bCalcLambda)
 670     {
 671         /* Only account for local atoms */
 672         for(b=b0; b<b1; b++)
 673         {
 674             mlambda[b] *= 0.5*nlocat[b];
 675         }
 676 #pragma omp barrier
 677     }
 678
 679     if (bCalcVir)
 680     {
 681 #pragma omp master
 682         {
 683             /* Constraint virial */
 684             for(b=0; b<ncons; b++)
 685             {
 686                 tmp0 = -bllen[b]*mlambda[b];
 687                 for(i=0; i<DIM; i++)
 688                 {
 689                     tmp1 = tmp0*r[b][i];
 690                     for(j=0; j<DIM; j++)
 691                     {
 692                         rmdr[i][j] -= tmp1*r[b][j];
 693                     }
 694                 }
 695             } /* 22 ncons flops */
 696         }
 697     }
 698
 699     /* Total:
 700      * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
 701      * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
 702      *
 703      * (26+nrec)*ncons + (6+2*nrec)*nrtot
 704      * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
 705      * if nit=1
 706      * (63+nrec)*ncons + (6+4*nrec)*nrtot
 707      */
 708 }
 709
 710 void set_lincs_matrix(struct gmx_lincsdata *li,real *invmass,real lambda)
 711 {
 712     int i,a1,a2,n,k,sign,center;
 713     int end,nk,kk;
 714     const real invsqrt2=0.7071067811865475244;
 715
 716     for(i=0; (i<li->nc); i++)
 717     {
 718         a1 = li->bla[2*i];
 719         a2 = li->bla[2*i+1];
 720         li->blc[i]  = gmx_invsqrt(invmass[a1] + invmass[a2]);
 721         li->blc1[i] = invsqrt2;
 722     }
 723
 724     /* Construct the coupling coefficient matrix blmf */
 725     li->ntriangle = 0;
 726     li->ncc_triangle = 0;
 727     for(i=0; (i<li->nc); i++)
 728     {
 729         a1 = li->bla[2*i];
 730         a2 = li->bla[2*i+1];
 731         for(n=li->blnr[i]; (n<li->blnr[i+1]); n++)
 732         {
 733             k = li->blbnb[n];
 734             if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
 735             {
 736                 sign = -1;
 737             }
 738             else
 739             {
 740                 sign = 1;
 741             }
 742             if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
 743             {
 744                 center = a1;
 745                 end    = a2;
 746             }
 747             else
 748             {
 749                 center = a2;
 750                 end    = a1;
 751             }
 752             li->blmf[n]  = sign*invmass[center]*li->blc[i]*li->blc[k];
 753             li->blmf1[n] = sign*0.5;
 754             if (li->ncg_triangle > 0)
 755             {
 756                 /* Look for constraint triangles */
 757                 for(nk=li->blnr[k]; (nk<li->blnr[k+1]); nk++)
 758                 {
 759                     kk = li->blbnb[nk];
 760                     if (kk != i && kk != k &&
 761                         (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
 762                     {
 763                         if (li->ntriangle == 0 ||
 764                             li->triangle[li->ntriangle-1] < i)
 765                         {
 766                             /* Add this constraint to the triangle list */
 767                             li->triangle[li->ntriangle] = i;
 768                             li->tri_bits[li->ntriangle] = 0;
 769                             li->ntriangle++;
 770                             if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
 771                             {
 772                                 gmx_fatal(FARGS,"A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
 773                                           li->blnr[i+1] - li->blnr[i],
 774                                           sizeof(li->tri_bits[0])*8-1);
 775                             }
 776                         }
 777                         li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
 778                         li->ncc_triangle++;
 779                     }
 780                 }
 781             }
 782         }
 783     }
 784
 785     if (debug)
 786     {
 787         fprintf(debug,"Of the %d constraints %d participate in triangles\n",
 788                 li->nc,li->ntriangle);
 789         fprintf(debug,"There are %d couplings of which %d in triangles\n",
 790                 li->ncc,li->ncc_triangle);
 791     }
 792
 793     /* Set matlam,
 794      * so we know with which lambda value the masses have been set.
 795      */
 796     li->matlam = lambda;
 797 }
 798
 799 static int count_triangle_constraints(t_ilist *ilist,t_blocka *at2con)
 800 {
 801     int  ncon1,ncon_tot;
 802     int  c0,a00,a01,n1,c1,a10,a11,ac1,n2,c2,a20,a21;
 803     int  ncon_triangle;
 804     gmx_bool bTriangle;
 805     t_iatom *ia1,*ia2,*iap;
 806
 807     ncon1    = ilist[F_CONSTR].nr/3;
 808     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 809
 810     ia1 = ilist[F_CONSTR].iatoms;
 811     ia2 = ilist[F_CONSTRNC].iatoms;
 812
 813     ncon_triangle = 0;
 814     for(c0=0; c0<ncon_tot; c0++)
 815     {
 816         bTriangle = FALSE;
 817         iap = constr_iatomptr(ncon1,ia1,ia2,c0);
 818         a00 = iap[1];
 819         a01 = iap[2];
 820         for(n1=at2con->index[a01]; n1<at2con->index[a01+1]; n1++)
 821         {
 822             c1 = at2con->a[n1];
 823             if (c1 != c0)
 824             {
 825                 iap = constr_iatomptr(ncon1,ia1,ia2,c1);
 826                 a10 = iap[1];
 827                 a11 = iap[2];
 828                 if (a10 == a01)
 829                 {
 830                     ac1 = a11;
 831                 }
 832                 else
 833                 {
 834                     ac1 = a10;
 835                 }
 836                 for(n2=at2con->index[ac1]; n2<at2con->index[ac1+1]; n2++)
 837                 {
 838                     c2 = at2con->a[n2];
 839                     if (c2 != c0 && c2 != c1)
 840                     {
 841                         iap = constr_iatomptr(ncon1,ia1,ia2,c2);
 842                         a20 = iap[1];
 843                         a21 = iap[2];
 844                         if (a20 == a00 || a21 == a00)
 845                         {
 846                             bTriangle = TRUE;
 847                         }
 848                     }
 849                 }
 850             }
 851         }
 852         if (bTriangle)
 853         {
 854             ncon_triangle++;
 855         }
 856     }
 857
 858     return ncon_triangle;
 859 }
 860
 861 static int int_comp(const void *a,const void *b)
 862 {
 863     return (*(int *)a) - (*(int *)b);
 864 }
 865
 866 gmx_lincsdata_t init_lincs(FILE *fplog,gmx_mtop_t *mtop,
 867                            int nflexcon_global,t_blocka *at2con,
 868                            gmx_bool bPLINCS,int nIter,int nProjOrder)
 869 {
 870     struct gmx_lincsdata *li;
 871     int mb;
 872     gmx_moltype_t *molt;
 873
 874     if (fplog)
 875     {
 876         fprintf(fplog,"\nInitializing%s LINear Constraint Solver\n",
 877                 bPLINCS ? " Parallel" : "");
 878     }
 879
 880     snew(li,1);
 881
 882     li->ncg      =
 883         gmx_mtop_ftype_count(mtop,F_CONSTR) +
 884         gmx_mtop_ftype_count(mtop,F_CONSTRNC);
 885     li->ncg_flex = nflexcon_global;
 886
 887     li->ncg_triangle = 0;
 888     for(mb=0; mb<mtop->nmolblock; mb++)
 889     {
 890         molt = &mtop->moltype[mtop->molblock[mb].type];
 891         li->ncg_triangle +=
 892             mtop->molblock[mb].nmol*
 893             count_triangle_constraints(molt->ilist,
 894                                        &at2con[mtop->molblock[mb].type]);
 895     }
 896
 897     li->nIter  = nIter;
 898     li->nOrder = nProjOrder;
 899
 900     /* LINCS can run on any number of threads.
 901      * Currently the number is fixed for the whole simulation,
 902      * but it could be set in set_lincs().
 903      */
 904     li->nth = gmx_omp_nthreads_get(emntLINCS);
 905     if (li->nth == 1)
 906     {
 907         snew(li->th,1);
 908     }
 909     else
 910     {
 911         /* Allocate an extra elements for "thread-overlap" constraints */
 912         snew(li->th,li->nth+1);
 913     }
 914     if (debug)
 915     {
 916         fprintf(debug,"LINCS: using %d threads\n",li->nth);
 917     }
 918
 919     if (bPLINCS || li->ncg_triangle > 0)
 920     {
 921         please_cite(fplog,"Hess2008a");
 922     }
 923     else
 924     {
 925         please_cite(fplog,"Hess97a");
 926     }
 927
 928     if (fplog)
 929     {
 930         fprintf(fplog,"The number of constraints is %d\n",li->ncg);
 931         if (bPLINCS)
 932         {
 933             fprintf(fplog,"There are inter charge-group constraints,\n"
 934                     "will communicate selected coordinates each lincs iteration\n");
 935         }
 936         if (li->ncg_triangle > 0)
 937         {
 938             fprintf(fplog,
 939                     "%d constraints are involved in constraint triangles,\n"
 940                     "will apply an additional matrix expansion of order %d for couplings\n"
 941                     "between constraints inside triangles\n",
 942                     li->ncg_triangle,li->nOrder);
 943         }
 944     }
 945
 946     return li;
 947 }
 948
 949 /* Sets up the work division over the threads */
 950 static void lincs_thread_setup(struct gmx_lincsdata *li,int natoms)
 951 {
 952     lincs_thread_t *li_m;
 953     int th;
 954     unsigned *atf;
 955     int a;
 956
 957     if (natoms > li->atf_nalloc)
 958     {
 959         li->atf_nalloc = over_alloc_large(natoms);
 960         srenew(li->atf,li->atf_nalloc);
 961     }
 962
 963     atf = li->atf;
 964     /* Clear the atom flags */
 965     for(a=0; a<natoms; a++)
 966     {
 967         atf[a] = 0;
 968     }
 969
 970     for(th=0; th<li->nth; th++)
 971     {
 972         lincs_thread_t *li_th;
 973         int b;
 974
 975         li_th = &li->th[th];
 976
 977         /* The constraints are divided equally over the threads */
 978         li_th->b0 = (li->nc* th   )/li->nth;
 979         li_th->b1 = (li->nc*(th+1))/li->nth;
 980
 981         if (th < sizeof(*atf)*8)
 982         {
 983             /* For each atom set a flag for constraints from each */
 984             for(b=li_th->b0; b<li_th->b1; b++)
 985             {
 986                 atf[li->bla[b*2]  ] |= (1U<<th);
 987                 atf[li->bla[b*2+1]] |= (1U<<th);
 988             }
 989         }
 990     }
 991
 992 #pragma omp parallel for num_threads(li->nth) schedule(static)
 993     for(th=0; th<li->nth; th++)
 994     {
 995         lincs_thread_t *li_th;
 996         unsigned mask;
 997         int b;
 998
 999         li_th = &li->th[th];
1000
1001         if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
1002         {
1003             li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
1004             srenew(li_th->ind,li_th->ind_nalloc);
1005             srenew(li_th->ind_r,li_th->ind_nalloc);
1006         }
1007
1008         if (th < sizeof(*atf)*8)
1009         {
1010             mask = (1U<<th) - 1U;
1011
1012             li_th->nind   = 0;
1013             li_th->nind_r = 0;
1014             for(b=li_th->b0; b<li_th->b1; b++)
1015             {
1016                 /* We let the constraint with the lowest thread index
1017                  * operate on atoms with constraints from multiple threads.
1018                  */
1019                 if (((atf[li->bla[b*2]]   & mask) == 0) &&
1020                     ((atf[li->bla[b*2+1]] & mask) == 0))
1021                 {
1022                     /* Add the constraint to the local atom update index */
1023                     li_th->ind[li_th->nind++] = b;
1024                 }
1025                 else
1026                 {
1027                     /* Add the constraint to the rest block */
1028                     li_th->ind_r[li_th->nind_r++] = b;
1029                 }
1030             }
1031         }
1032         else
1033         {
1034             /* We are out of bits, assign all constraints to rest */
1035             for(b=li_th->b0; b<li_th->b1; b++)
1036             {
1037                 li_th->ind_r[li_th->nind_r++] = b;
1038             }
1039         }
1040     }
1041
1042     /* We need to copy all constraints which have not be assigned
1043      * to a thread to a separate list which will be handled by one thread.
1044      */
1045     li_m = &li->th[li->nth];
1046
1047     li_m->nind = 0;
1048     for(th=0; th<li->nth; th++)
1049     {
1050         lincs_thread_t *li_th;
1051         int b;
1052
1053         li_th   = &li->th[th];
1054
1055         if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1056         {
1057             li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1058             srenew(li_m->ind,li_m->ind_nalloc);
1059         }
1060
1061         for(b=0; b<li_th->nind_r; b++)
1062         {
1063             li_m->ind[li_m->nind++] = li_th->ind_r[b];
1064         }
1065
1066         if (debug)
1067         {
1068             fprintf(debug,"LINCS thread %d: %d constraints\n",
1069                     th,li_th->nind);
1070         }
1071     }
1072
1073     if (debug)
1074     {
1075         fprintf(debug,"LINCS thread r: %d constraints\n",
1076                 li_m->nind);
1077     }
1078 }
1079
1080
1081 void set_lincs(t_idef *idef,t_mdatoms *md,
1082                gmx_bool bDynamics,t_commrec *cr,
1083                struct gmx_lincsdata *li)
1084 {
1085     int      start,natoms,nflexcon;
1086     t_blocka at2con;
1087     t_iatom  *iatom;
1088     int      i,k,ncc_alloc,ni,con,nconnect,concon;
1089     int      type,a1,a2;
1090     real     lenA=0,lenB;
1091     gmx_bool     bLocal;
1092
1093     li->nc = 0;
1094     li->ncc = 0;
1095     /* Zero the thread index ranges.
1096      * Otherwise without local constraints we could return with old ranges.
1097      */
1098     for(i=0; i<li->nth; i++)
1099     {
1100         li->th[i].b0   = 0;
1101         li->th[i].b1   = 0;
1102         li->th[i].nind = 0;
1103     }
1104     if (li->nth > 1)
1105     {
1106         li->th[li->nth].nind = 0;
1107     }
1108
1109     /* This is the local topology, so there are only F_CONSTR constraints */
1110     if (idef->il[F_CONSTR].nr == 0)
1111     {
1112         /* There are no constraints,
1113          * we do not need to fill any data structures.
1114          */
1115         return;
1116     }
1117
1118     if (debug)
1119     {
1120         fprintf(debug,"Building the LINCS connectivity\n");
1121     }
1122
1123     if (DOMAINDECOMP(cr))
1124     {
1125         if (cr->dd->constraints)
1126         {
1127             dd_get_constraint_range(cr->dd,&start,&natoms);
1128         }
1129         else
1130         {
1131             natoms = cr->dd->nat_home;
1132         }
1133         start = 0;
1134     }
1135     else if(PARTDECOMP(cr))
1136         {
1137                 pd_get_constraint_range(cr->pd,&start,&natoms);
1138         }
1139         else
1140     {
1141         start  = md->start;
1142         natoms = md->homenr;
1143     }
1144     at2con = make_at2con(start,natoms,idef->il,idef->iparams,bDynamics,
1145                          &nflexcon);
1146
1147
1148     if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1149     {
1150         li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1151         srenew(li->bllen0,li->nc_alloc);
1152         srenew(li->ddist,li->nc_alloc);
1153         srenew(li->bla,2*li->nc_alloc);
1154         srenew(li->blc,li->nc_alloc);
1155         srenew(li->blc1,li->nc_alloc);
1156         srenew(li->blnr,li->nc_alloc+1);
1157         srenew(li->bllen,li->nc_alloc);
1158         srenew(li->tmpv,li->nc_alloc);
1159         srenew(li->tmp1,li->nc_alloc);
1160         srenew(li->tmp2,li->nc_alloc);
1161         srenew(li->tmp3,li->nc_alloc);
1162         srenew(li->tmp4,li->nc_alloc);
1163         srenew(li->mlambda,li->nc_alloc);
1164         if (li->ncg_triangle > 0)
1165         {
1166             /* This is allocating too much, but it is difficult to improve */
1167             srenew(li->triangle,li->nc_alloc);
1168             srenew(li->tri_bits,li->nc_alloc);
1169         }
1170     }
1171
1172     iatom = idef->il[F_CONSTR].iatoms;
1173
1174     ncc_alloc = li->ncc_alloc;
1175     li->blnr[0] = 0;
1176
1177     ni = idef->il[F_CONSTR].nr/3;
1178
1179     con = 0;
1180     nconnect = 0;
1181     li->blnr[con] = nconnect;
1182     for(i=0; i<ni; i++)
1183     {
1184         bLocal = TRUE;
1185         type = iatom[3*i];
1186         a1   = iatom[3*i+1];
1187         a2   = iatom[3*i+2];
1188         lenA = idef->iparams[type].constr.dA;
1189         lenB = idef->iparams[type].constr.dB;
1190         /* Skip the flexible constraints when not doing dynamics */
1191         if (bDynamics || lenA!=0 || lenB!=0)
1192         {
1193             li->bllen0[con]  = lenA;
1194             li->ddist[con]   = lenB - lenA;
1195             /* Set the length to the topology A length */
1196             li->bllen[con]   = li->bllen0[con];
1197             li->bla[2*con]   = a1;
1198             li->bla[2*con+1] = a2;
1199             /* Construct the constraint connection matrix blbnb */
1200             for(k=at2con.index[a1-start]; k<at2con.index[a1-start+1]; k++)
1201             {
1202                 concon = at2con.a[k];
1203                 if (concon != i)
1204                 {
1205                     if (nconnect >= ncc_alloc)
1206                     {
1207                         ncc_alloc = over_alloc_small(nconnect+1);
1208                         srenew(li->blbnb,ncc_alloc);
1209                     }
1210                     li->blbnb[nconnect++] = concon;
1211                 }
1212             }
1213             for(k=at2con.index[a2-start]; k<at2con.index[a2-start+1]; k++)
1214             {
1215                 concon = at2con.a[k];
1216                 if (concon != i)
1217                 {
1218                     if (nconnect+1 > ncc_alloc)
1219                     {
1220                         ncc_alloc = over_alloc_small(nconnect+1);
1221                         srenew(li->blbnb,ncc_alloc);
1222                     }
1223                     li->blbnb[nconnect++] = concon;
1224                 }
1225             }
1226             li->blnr[con+1] = nconnect;
1227
1228             if (cr->dd == NULL)
1229             {
1230                 /* Order the blbnb matrix to optimize memory access */
1231                 qsort(&(li->blbnb[li->blnr[con]]),li->blnr[con+1]-li->blnr[con],
1232                       sizeof(li->blbnb[0]),int_comp);
1233             }
1234             /* Increase the constraint count */
1235             con++;
1236         }
1237     }
1238
1239     done_blocka(&at2con);
1240
1241     /* This is the real number of constraints,
1242      * without dynamics the flexible constraints are not present.
1243      */
1244     li->nc = con;
1245
1246     li->ncc = li->blnr[con];
1247     if (cr->dd == NULL)
1248     {
1249         /* Since the matrix is static, we can free some memory */
1250         ncc_alloc = li->ncc;
1251         srenew(li->blbnb,ncc_alloc);
1252     }
1253
1254     if (ncc_alloc > li->ncc_alloc)
1255     {
1256         li->ncc_alloc = ncc_alloc;
1257         srenew(li->blmf,li->ncc_alloc);
1258         srenew(li->blmf1,li->ncc_alloc);
1259         srenew(li->tmpncc,li->ncc_alloc);
1260     }
1261
1262     if (debug)
1263     {
1264         fprintf(debug,"Number of constraints is %d, couplings %d\n",
1265                 li->nc,li->ncc);
1266     }
1267
1268     if (li->nth == 1)
1269     {
1270         li->th[0].b0 = 0;
1271         li->th[0].b1 = li->nc;
1272     }
1273     else
1274     {
1275         lincs_thread_setup(li,md->nr);
1276     }
1277
1278     set_lincs_matrix(li,md->invmass,md->lambda);
1279 }
1280
1281 static void lincs_warning(FILE *fplog,
1282                           gmx_domdec_t *dd,rvec *x,rvec *xprime,t_pbc *pbc,
1283                           int ncons,int *bla,real *bllen,real wangle,
1284                           int maxwarn,int *warncount)
1285 {
1286     int b,i,j;
1287     rvec v0,v1;
1288     real wfac,d0,d1,cosine;
1289     char buf[STRLEN];
1290
1291     wfac=cos(DEG2RAD*wangle);
1292
1293     sprintf(buf,"bonds that rotated more than %g degrees:\n"
1294             " atom 1 atom 2  angle  previous, current, constraint length\n",
1295             wangle);
1296     fprintf(stderr,"%s",buf);
1297     if (fplog)
1298     {
1299         fprintf(fplog,"%s",buf);
1300     }
1301
1302     for(b=0;b<ncons;b++)
1303     {
1304         i = bla[2*b];
1305         j = bla[2*b+1];
1306         if (pbc)
1307         {
1308             pbc_dx_aiuc(pbc,x[i],x[j],v0);
1309             pbc_dx_aiuc(pbc,xprime[i],xprime[j],v1);
1310         }
1311         else
1312         {
1313             rvec_sub(x[i],x[j],v0);
1314             rvec_sub(xprime[i],xprime[j],v1);
1315         }
1316         d0 = norm(v0);
1317         d1 = norm(v1);
1318         cosine = iprod(v0,v1)/(d0*d1);
1319         if (cosine < wfac)
1320         {
1321             sprintf(buf," %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n",
1322                     ddglatnr(dd,i),ddglatnr(dd,j),
1323                     RAD2DEG*acos(cosine),d0,d1,bllen[b]);
1324             fprintf(stderr,"%s",buf);
1325             if (fplog)
1326             {
1327                 fprintf(fplog,"%s",buf);
1328             }
1329             if (!gmx_isfinite(d1))
1330             {
1331                 gmx_fatal(FARGS,"Bond length not finite.");
1332             }
1333
1334             (*warncount)++;
1335         }
1336     }
1337     if (*warncount > maxwarn)
1338     {
1339         too_many_constraint_warnings(econtLINCS,*warncount);
1340     }
1341 }
1342
1343 static void cconerr(gmx_domdec_t *dd,
1344                     int ncons,int *bla,real *bllen,rvec *x,t_pbc *pbc,
1345                     real *ncons_loc,real *ssd,real *max,int *imax)
1346 {
1347     real      len,d,ma,ssd2,r2;
1348     int       *nlocat,count,b,im;
1349     rvec      dx;
1350
1351     if (dd && dd->constraints)
1352     {
1353         nlocat = dd_constraints_nlocalatoms(dd);
1354     }
1355     else
1356     {
1357         nlocat = 0;
1358     }
1359
1360     ma = 0;
1361     ssd2 = 0;
1362     im = 0;
1363     count = 0;
1364     for(b=0;b<ncons;b++)
1365     {
1366         if (pbc)
1367         {
1368             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
1369         }
1370         else {
1371             rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
1372         }
1373         r2 = norm2(dx);
1374         len = r2*gmx_invsqrt(r2);
1375         d = fabs(len/bllen[b]-1);
1376         if (d > ma && (nlocat==NULL || nlocat[b]))
1377         {
1378             ma = d;
1379             im = b;
1380         }
1381         if (nlocat == NULL)
1382         {
1383             ssd2 += d*d;
1384             count++;
1385         }
1386         else
1387         {
1388             ssd2 += nlocat[b]*d*d;
1389             count += nlocat[b];
1390         }
1391     }
1392
1393     *ncons_loc = (nlocat ? 0.5 : 1)*count;
1394     *ssd       = (nlocat ? 0.5 : 1)*ssd2;
1395     *max = ma;
1396     *imax = im;
1397 }
1398
1399 static void dump_conf(gmx_domdec_t *dd,struct gmx_lincsdata *li,
1400                       t_blocka *at2con,
1401                       char *name,gmx_bool bAll,rvec *x,matrix box)
1402 {
1403     char str[STRLEN];
1404     FILE *fp;
1405     int  ac0,ac1,i;
1406
1407     dd_get_constraint_range(dd,&ac0,&ac1);
1408
1409     sprintf(str,"%s_%d_%d_%d.pdb",name,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
1410     fp = gmx_fio_fopen(str,"w");
1411     fprintf(fp,"CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1\n",
1412             10*norm(box[XX]),10*norm(box[YY]),10*norm(box[ZZ]),
1413             90.0,90.0,90.0);
1414     for(i=0; i<ac1; i++)
1415     {
1416         if (i < dd->nat_home || (bAll && i >= ac0 && i < ac1))
1417         {
1418             fprintf(fp,"%-6s%5u  %-4.4s%3.3s %c%4d    %8.3f%8.3f%8.3f%6.2f%6.2f\n",
1419                     "ATOM",ddglatnr(dd,i),"C","ALA",' ',i+1,
1420                     10*x[i][XX],10*x[i][YY],10*x[i][ZZ],
1421                     1.0,i<dd->nat_tot ? 0.0 : 1.0);
1422         }
1423     }
1424     if (bAll)
1425     {
1426         for(i=0; i<li->nc; i++)
1427         {
1428             fprintf(fp,"CONECT%5d%5d\n",
1429                     ddglatnr(dd,li->bla[2*i]),
1430                     ddglatnr(dd,li->bla[2*i+1]));
1431         }
1432     }
1433     gmx_fio_fclose(fp);
1434 }
1435
1436 gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
1437                          t_inputrec *ir,
1438                          gmx_large_int_t step,
1439                          struct gmx_lincsdata *lincsd,t_mdatoms *md,
1440                          t_commrec *cr,
1441                          rvec *x,rvec *xprime,rvec *min_proj,
1442                          matrix box,t_pbc *pbc,
1443                          real lambda,real *dvdlambda,
1444                          real invdt,rvec *v,
1445                          gmx_bool bCalcVir,tensor rmdr,
1446                          int econq,
1447                          t_nrnb *nrnb,
1448                          int maxwarn,int *warncount)
1449 {
1450     char  buf[STRLEN],buf2[22],buf3[STRLEN];
1451     int   i,warn=0,p_imax,error;
1452     real  ncons_loc,p_ssd,p_max=0;
1453     rvec  dx;
1454     gmx_bool  bOK;
1455
1456     bOK = TRUE;
1457
1458     if (lincsd->nc == 0 && cr->dd == NULL)
1459     {
1460         if (bLog || bEner)
1461         {
1462             lincsd->rmsd_data[0] = 0;
1463             if (ir->eI == eiSD2 && v == NULL)
1464             {
1465                 i = 2;
1466             }
1467             else
1468             {
1469                 i = 1;
1470             }
1471             lincsd->rmsd_data[i] = 0;
1472         }
1473
1474         return bOK;
1475     }
1476
1477     if (econq == econqCoord)
1478     {
1479         if (ir->efep != efepNO)
1480         {
1481             if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1482             {
1483                 set_lincs_matrix(lincsd,md->invmass,md->lambda);
1484             }
1485
1486             for(i=0; i<lincsd->nc; i++)
1487             {
1488                 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1489             }
1490         }
1491
1492         if (lincsd->ncg_flex)
1493         {
1494             /* Set the flexible constraint lengths to the old lengths */
1495             if (pbc != NULL)
1496             {
1497                 for(i=0; i<lincsd->nc; i++)
1498                 {
1499                     if (lincsd->bllen[i] == 0) {
1500                         pbc_dx_aiuc(pbc,x[lincsd->bla[2*i]],x[lincsd->bla[2*i+1]],dx);
1501                         lincsd->bllen[i] = norm(dx);
1502                     }
1503                 }
1504             }
1505             else
1506             {
1507                 for(i=0; i<lincsd->nc; i++)
1508                 {
1509                     if (lincsd->bllen[i] == 0)
1510                     {
1511                         lincsd->bllen[i] =
1512                             sqrt(distance2(x[lincsd->bla[2*i]],
1513                                            x[lincsd->bla[2*i+1]]));
1514                     }
1515                 }
1516             }
1517         }
1518
1519         if (bLog && fplog)
1520         {
1521             cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1522                     &ncons_loc,&p_ssd,&p_max,&p_imax);
1523         }
1524
1525         /* The (only) OpenMP parallel region of constrain_lincs */
1526 #pragma omp parallel num_threads(lincsd->nth)
1527         {
1528             int th=gmx_omp_get_thread_num();
1529             do_lincs(x,xprime,box,pbc,lincsd,th,
1530                      md->invmass,cr,
1531                      bCalcVir || (ir->efep != efepNO),
1532                      ir->LincsWarnAngle,&warn,
1533                      invdt,v,bCalcVir,rmdr);
1534         }
1535
1536         if (ir->efep != efepNO)
1537         {
1538             real dt_2,dvdl=0;
1539
1540             dt_2 = 1.0/(ir->delta_t*ir->delta_t);
1541             for(i=0; (i<lincsd->nc); i++)
1542             {
1543                 dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
1544             }
1545             *dvdlambda += dvdl;
1546                 }
1547
1548         if (bLog && fplog && lincsd->nc > 0)
1549         {
1550             fprintf(fplog,"   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
1551             fprintf(fplog,"       Before LINCS          %.6f    %.6f %6d %6d\n",
1552                     sqrt(p_ssd/ncons_loc),p_max,
1553                     ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1554                     ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1555         }
1556         if (bLog || bEner)
1557         {
1558             cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1559                     &ncons_loc,&p_ssd,&p_max,&p_imax);
1560             /* Check if we are doing the second part of SD */
1561             if (ir->eI == eiSD2 && v == NULL)
1562             {
1563                 i = 2;
1564             }
1565             else
1566             {
1567                 i = 1;
1568             }
1569             lincsd->rmsd_data[0] = ncons_loc;
1570             lincsd->rmsd_data[i] = p_ssd;
1571         }
1572         else
1573         {
1574             lincsd->rmsd_data[0] = 0;
1575             lincsd->rmsd_data[1] = 0;
1576             lincsd->rmsd_data[2] = 0;
1577         }
1578         if (bLog && fplog && lincsd->nc > 0)
1579         {
1580             fprintf(fplog,
1581                     "        After LINCS          %.6f    %.6f %6d %6d\n\n",
1582                     sqrt(p_ssd/ncons_loc),p_max,
1583                     ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1584                     ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1585         }
1586
1587         if (warn > 0)
1588         {
1589             if (maxwarn >= 0)
1590             {
1591                 cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1592                         &ncons_loc,&p_ssd,&p_max,&p_imax);
1593                 if (MULTISIM(cr))
1594                 {
1595                     sprintf(buf3," in simulation %d", cr->ms->sim);
1596                 }
1597                 else
1598                 {
1599                     buf3[0] = 0;
1600                 }
1601                 sprintf(buf,"\nStep %s, time %g (ps)  LINCS WARNING%s\n"
1602                         "relative constraint deviation after LINCS:\n"
1603                         "rms %.6f, max %.6f (between atoms %d and %d)\n",
1604                         gmx_step_str(step,buf2),ir->init_t+step*ir->delta_t,
1605                         buf3,
1606                         sqrt(p_ssd/ncons_loc),p_max,
1607                         ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1608                         ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1609                 if (fplog)
1610                 {
1611                     fprintf(fplog,"%s",buf);
1612                 }
1613                 fprintf(stderr,"%s",buf);
1614                 lincs_warning(fplog,cr->dd,x,xprime,pbc,
1615                               lincsd->nc,lincsd->bla,lincsd->bllen,
1616                               ir->LincsWarnAngle,maxwarn,warncount);
1617             }
1618             bOK = (p_max < 0.5);
1619         }
1620
1621         if (lincsd->ncg_flex) {
1622             for(i=0; (i<lincsd->nc); i++)
1623                 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1624                     lincsd->bllen[i] = 0;
1625         }
1626     }
1627     else
1628     {
1629         do_lincsp(x,xprime,min_proj,pbc,lincsd,md->invmass,econq,dvdlambda,
1630                   bCalcVir,rmdr);
1631     }
1632
1633     /* count assuming nit=1 */
1634     inc_nrnb(nrnb,eNR_LINCS,lincsd->nc);
1635     inc_nrnb(nrnb,eNR_LINCSMAT,(2+lincsd->nOrder)*lincsd->ncc);
1636     if (lincsd->ntriangle > 0)
1637     {
1638         inc_nrnb(nrnb,eNR_LINCSMAT,lincsd->nOrder*lincsd->ncc_triangle);
1639     }
1640     if (v)
1641     {
1642         inc_nrnb(nrnb,eNR_CONSTR_V,lincsd->nc*2);
1643     }
1644     if (bCalcVir)
1645     {
1646         inc_nrnb(nrnb,eNR_CONSTR_VIR,lincsd->nc);
1647     }
1648
1649     return bOK;
1650 }