src/mdlib/clincs.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /* This file is completely threadsafe - keep it that way! */
  39 #ifdef HAVE_CONFIG_H
  40 #include <config.h>
  41 #endif
  42
  43 #include <math.h>
  44 #include "main.h"
  45 #include "constr.h"
  46 #include "copyrite.h"
  47 #include "physics.h"
  48 #include "vec.h"
  49 #include "pbc.h"
  50 #include "smalloc.h"
  51 #include "mdrun.h"
  52 #include "nrnb.h"
  53 #include "domdec.h"
  54 #include "partdec.h"
  55 #include "mtop_util.h"
  56 #include "gmxfio.h"
  57 #include "gmx_omp_nthreads.h"
  58 #include "gmx_omp.h"
  59
  60 typedef struct {
  61     int b0;           /* first constraint for this thread */
  62     int b1;           /* b1-1 is the last constraint for this thread */
  63     int nind;         /* number of indices */
  64     int *ind;         /* constraint index for updating atom data */
  65     int nind_r;       /* number of indices */
  66     int *ind_r;       /* constraint index for updating atom data */
  67     int ind_nalloc;   /* allocation size of ind and ind_r */
  68     tensor vir_r_m_dr;/* temporary variable for virial calculation */
  69 } lincs_thread_t;
  70
  71 typedef struct gmx_lincsdata {
  72     int  ncg;         /* the global number of constraints */
  73     int  ncg_flex;    /* the global number of flexible constraints */
  74     int  ncg_triangle;/* the global number of constraints in triangles */
  75     int  nIter;       /* the number of iterations */
  76     int  nOrder;      /* the order of the matrix expansion */
  77     int  nc;          /* the number of constraints */
  78     int  nc_alloc;    /* the number we allocated memory for */
  79     int  ncc;         /* the number of constraint connections */
  80     int  ncc_alloc;   /* the number we allocated memory for */
  81     real matlam;      /* the FE lambda value used for filling blc and blmf */
  82     real *bllen0;     /* the reference distance in topology A */
  83     real *ddist;      /* the reference distance in top B - the r.d. in top A */
  84     int  *bla;        /* the atom pairs involved in the constraints */
  85     real *blc;        /* 1/sqrt(invmass1 + invmass2) */
  86     real *blc1;       /* as blc, but with all masses 1 */
  87     int  *blnr;       /* index into blbnb and blmf */
  88     int  *blbnb;      /* list of constraint connections */
  89     int  ntriangle;   /* the local number of constraints in triangles */
  90     int  *triangle;   /* the list of triangle constraints */
  91     int  *tri_bits;   /* the bits tell if the matrix element should be used */
  92     int  ncc_triangle;/* the number of constraint connections in triangles */
  93     gmx_bool bCommIter; /* communicate before each LINCS interation */
  94     real *blmf;       /* matrix of mass factors for constraint connections */
  95     real *blmf1;      /* as blmf, but with all masses 1 */
  96     real *bllen;      /* the reference bond length */
  97     int  nth;         /* The number of threads doing LINCS */
  98     lincs_thread_t *th; /* LINCS thread division */
  99     unsigned *atf;    /* atom flags for thread parallelization */
 100     int  atf_nalloc;  /* allocation size of atf */
 101     /* arrays for temporary storage in the LINCS algorithm */
 102     rvec *tmpv;
 103     real *tmpncc;
 104     real *tmp1;
 105     real *tmp2;
 106     real *tmp3;
 107     real *tmp4;
 108     real *mlambda;  /* the Lagrange multipliers * -1 */
 109     /* storage for the constraint RMS relative deviation output */
 110     real rmsd_data[3];
 111 } t_gmx_lincsdata;
 112
 113 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
 114 {
 115     return lincsd->rmsd_data;
 116 }
 117
 118 real lincs_rmsd(struct gmx_lincsdata *lincsd,gmx_bool bSD2)
 119 {
 120     if (lincsd->rmsd_data[0] > 0)
 121     {
 122         return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
 123     }
 124     else
 125     {
 126         return 0;
 127     }
 128 }
 129
 130 /* Do a set of nrec LINCS matrix multiplications.
 131  * This function will return with up to date thread-local
 132  * constraint data, without an OpenMP barrier.
 133  */
 134 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
 135                                 int b0,int b1,
 136                                 const real *blcc,
 137                                 real *rhs1,real *rhs2,real *sol)
 138 {
 139     int  nrec,rec,b,j,n,nr0,nr1;
 140     real mvb,*swap;
 141     int  ntriangle,tb,bits;
 142     const int *blnr=lincsd->blnr,*blbnb=lincsd->blbnb;
 143     const int *triangle=lincsd->triangle,*tri_bits=lincsd->tri_bits;
 144
 145     ntriangle = lincsd->ntriangle;
 146     nrec      = lincsd->nOrder;
 147
 148     for(rec=0; rec<nrec; rec++)
 149     {
 150 #pragma omp barrier
 151         for(b=b0; b<b1; b++)
 152         {
 153             mvb = 0;
 154             for(n=blnr[b]; n<blnr[b+1]; n++)
 155             {
 156                 j = blbnb[n];
 157                 mvb = mvb + blcc[n]*rhs1[j];
 158             }
 159             rhs2[b] = mvb;
 160             sol[b]  = sol[b] + mvb;
 161         }
 162         swap = rhs1;
 163         rhs1 = rhs2;
 164         rhs2 = swap;
 165     } /* nrec*(ncons+2*nrtot) flops */
 166
 167     if (ntriangle > 0)
 168     {
 169         /* Perform an extra nrec recursions for only the constraints
 170          * involved in rigid triangles.
 171          * In this way their accuracy should come close to those of the other
 172          * constraints, since traingles of constraints can produce eigenvalues
 173          * around 0.7, while the effective eigenvalue for bond constraints
 174          * is around 0.4 (and 0.7*0.7=0.5).
 175          */
 176         /* We need to copy the temporary array, since only the elements
 177          * for constraints involved in triangles are updated and then
 178          * the pointers are swapped. This saving copying the whole arrary.
 179          * We need barrier as other threads might still be reading from rhs2.
 180          */
 181 #pragma omp barrier
 182         for(b=b0; b<b1; b++)
 183         {
 184             rhs2[b] = rhs1[b];
 185         }
 186 #pragma omp barrier
 187 #pragma omp master
 188         {
 189             for(rec=0; rec<nrec; rec++)
 190             {
 191                 for(tb=0; tb<ntriangle; tb++)
 192                 {
 193                     b    = triangle[tb];
 194                     bits = tri_bits[tb];
 195                     mvb = 0;
 196                     nr0 = blnr[b];
 197                     nr1 = blnr[b+1];
 198                     for(n=nr0; n<nr1; n++)
 199                     {
 200                         if (bits & (1<<(n-nr0)))
 201                         {
 202                             j = blbnb[n];
 203                             mvb = mvb + blcc[n]*rhs1[j];
 204                         }
 205                     }
 206                     rhs2[b] = mvb;
 207                     sol[b]  = sol[b] + mvb;
 208                 }
 209                 swap = rhs1;
 210                 rhs1 = rhs2;
 211                 rhs2 = swap;
 212             }
 213         } /* flops count is missing here */
 214
 215         /* We need a barrier here as the calling routine will continue
 216          * to operate on the thread-local constraints without barrier.
 217          */
 218 #pragma omp barrier
 219     }
 220 }
 221
 222 static void lincs_update_atoms_noind(int ncons,const int *bla,
 223                                      real prefac,
 224                                      const real *fac,rvec *r,
 225                                      const real *invmass,
 226                                      rvec *x)
 227 {
 228     int  b,i,j;
 229     real mvb,im1,im2,tmp0,tmp1,tmp2;
 230
 231     if (invmass != NULL)
 232     {
 233         for(b=0; b<ncons; b++)
 234         {
 235             i = bla[2*b];
 236             j = bla[2*b+1];
 237             mvb = prefac*fac[b];
 238             im1 = invmass[i];
 239             im2 = invmass[j];
 240             tmp0 = r[b][0]*mvb;
 241             tmp1 = r[b][1]*mvb;
 242             tmp2 = r[b][2]*mvb;
 243             x[i][0] -= tmp0*im1;
 244             x[i][1] -= tmp1*im1;
 245             x[i][2] -= tmp2*im1;
 246             x[j][0] += tmp0*im2;
 247             x[j][1] += tmp1*im2;
 248             x[j][2] += tmp2*im2;
 249         } /* 16 ncons flops */
 250     }
 251     else
 252     {
 253         for(b=0; b<ncons; b++)
 254         {
 255             i = bla[2*b];
 256             j = bla[2*b+1];
 257             mvb = prefac*fac[b];
 258             tmp0 = r[b][0]*mvb;
 259             tmp1 = r[b][1]*mvb;
 260             tmp2 = r[b][2]*mvb;
 261             x[i][0] -= tmp0;
 262             x[i][1] -= tmp1;
 263             x[i][2] -= tmp2;
 264             x[j][0] += tmp0;
 265             x[j][1] += tmp1;
 266             x[j][2] += tmp2;
 267         }
 268     }
 269 }
 270
 271 static void lincs_update_atoms_ind(int ncons,const int *ind,const int *bla,
 272                                    real prefac,
 273                                    const real *fac,rvec *r,
 274                                    const real *invmass,
 275                                    rvec *x)
 276 {
 277     int  bi,b,i,j;
 278     real mvb,im1,im2,tmp0,tmp1,tmp2;
 279
 280     if (invmass != NULL)
 281     {
 282         for(bi=0; bi<ncons; bi++)
 283         {
 284             b = ind[bi];
 285             i = bla[2*b];
 286             j = bla[2*b+1];
 287             mvb = prefac*fac[b];
 288             im1 = invmass[i];
 289             im2 = invmass[j];
 290             tmp0 = r[b][0]*mvb;
 291             tmp1 = r[b][1]*mvb;
 292             tmp2 = r[b][2]*mvb;
 293             x[i][0] -= tmp0*im1;
 294             x[i][1] -= tmp1*im1;
 295             x[i][2] -= tmp2*im1;
 296             x[j][0] += tmp0*im2;
 297             x[j][1] += tmp1*im2;
 298             x[j][2] += tmp2*im2;
 299         } /* 16 ncons flops */
 300     }
 301     else
 302     {
 303         for(bi=0; bi<ncons; bi++)
 304         {
 305             b = ind[bi];
 306             i = bla[2*b];
 307             j = bla[2*b+1];
 308             mvb = prefac*fac[b];
 309             tmp0 = r[b][0]*mvb;
 310             tmp1 = r[b][1]*mvb;
 311             tmp2 = r[b][2]*mvb;
 312             x[i][0] -= tmp0;
 313             x[i][1] -= tmp1;
 314             x[i][2] -= tmp2;
 315             x[j][0] += tmp0;
 316             x[j][1] += tmp1;
 317             x[j][2] += tmp2;
 318         } /* 16 ncons flops */
 319     }
 320 }
 321
 322 static void lincs_update_atoms(struct gmx_lincsdata *li,int th,
 323                                real prefac,
 324                                const real *fac,rvec *r,
 325                                const real *invmass,
 326                                rvec *x)
 327 {
 328     if (li->nth == 1)
 329     {
 330         /* Single thread, we simply update for all constraints */
 331         lincs_update_atoms_noind(li->nc,li->bla,prefac,fac,r,invmass,x);
 332     }
 333     else
 334     {
 335         /* Update the atom vector components for our thread local
 336          * constraints that only access our local atom range.
 337          * This can be done without a barrier.
 338          */
 339         lincs_update_atoms_ind(li->th[th].nind,li->th[th].ind,
 340                                li->bla,prefac,fac,r,invmass,x);
 341
 342         if (li->th[li->nth].nind > 0)
 343         {
 344             /* Update the constraints that operate on atoms
 345              * in multiple thread atom blocks on the master thread.
 346              */
 347 #pragma omp barrier
 348 #pragma omp master
 349             {
 350                 lincs_update_atoms_ind(li->th[li->nth].nind,
 351                                        li->th[li->nth].ind,
 352                                        li->bla,prefac,fac,r,invmass,x);
 353             }
 354         }
 355     }
 356 }
 357
 358 /* LINCS projection, works on derivatives of the coordinates */
 359 static void do_lincsp(rvec *x,rvec *f,rvec *fp,t_pbc *pbc,
 360                       struct gmx_lincsdata *lincsd,int th,
 361                       real *invmass,
 362                       int econq,real *dvdlambda,
 363                       gmx_bool bCalcVir,tensor rmdf)
 364 {
 365     int     b0,b1,b,i,j,k,n;
 366     real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,wfac,lam;
 367     rvec    dx;
 368     int     *bla,*blnr,*blbnb;
 369     rvec    *r;
 370     real    *blc,*blmf,*blcc,*rhs1,*rhs2,*sol;
 371
 372     b0 = lincsd->th[th].b0;
 373     b1 = lincsd->th[th].b1;
 374
 375     bla    = lincsd->bla;
 376     r      = lincsd->tmpv;
 377     blnr   = lincsd->blnr;
 378     blbnb  = lincsd->blbnb;
 379     if (econq != econqForce)
 380     {
 381         /* Use mass-weighted parameters */
 382         blc  = lincsd->blc;
 383         blmf = lincsd->blmf;
 384     }
 385     else
 386     {
 387         /* Use non mass-weighted parameters */
 388         blc  = lincsd->blc1;
 389         blmf = lincsd->blmf1;
 390     }
 391     blcc   = lincsd->tmpncc;
 392     rhs1   = lincsd->tmp1;
 393     rhs2   = lincsd->tmp2;
 394     sol    = lincsd->tmp3;
 395
 396     /* Compute normalized i-j vectors */
 397     if (pbc)
 398     {
 399         for(b=b0; b<b1; b++)
 400         {
 401             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
 402             unitv(dx,r[b]);
 403         }
 404     }
 405     else
 406     {
 407         for(b=b0; b<b1; b++)
 408         {
 409             rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
 410             unitv(dx,r[b]);
 411         } /* 16 ncons flops */
 412     }
 413
 414 #pragma omp barrier
 415     for(b=b0; b<b1; b++)
 416     {
 417         tmp0 = r[b][0];
 418         tmp1 = r[b][1];
 419         tmp2 = r[b][2];
 420         i = bla[2*b];
 421         j = bla[2*b+1];
 422         for(n=blnr[b]; n<blnr[b+1]; n++)
 423         {
 424             k = blbnb[n];
 425             blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 426         } /* 6 nr flops */
 427         mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
 428                       tmp1*(f[i][1] - f[j][1]) +
 429                       tmp2*(f[i][2] - f[j][2]));
 430         rhs1[b] = mvb;
 431         sol[b]  = mvb;
 432         /* 7 flops */
 433     }
 434     /* Together: 23*ncons + 6*nrtot flops */
 435
 436     lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 437     /* nrec*(ncons+2*nrtot) flops */
 438
 439     if (econq == econqDeriv_FlexCon)
 440     {
 441         /* We only want to constraint the flexible constraints,
 442          * so we mask out the normal ones by setting sol to 0.
 443          */
 444         for(b=b0; b<b1; b++)
 445         {
 446             if (!(lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
 447             {
 448                 sol[b] = 0;
 449             }
 450         }
 451     }
 452
 453     /* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
 454     for(b=b0; b<b1; b++)
 455     {
 456         sol[b] *= blc[b];
 457     }
 458
 459     /* When constraining forces, we should not use mass weighting,
 460      * so we pass invmass=NULL, which results in the use of 1 for all atoms.
 461      */
 462     lincs_update_atoms(lincsd,th,1.0,sol,r,
 463                        (econq != econqForce) ? invmass : NULL,fp);
 464
 465     if (dvdlambda != NULL)
 466     {
 467 #pragma omp barrier
 468         for(b=b0; b<b1; b++)
 469         {
 470             *dvdlambda -= sol[b]*lincsd->ddist[b];
 471         }
 472         /* 10 ncons flops */
 473     }
 474
 475     if (bCalcVir)
 476     {
 477         /* Constraint virial,
 478          * determines sum r_bond x delta f,
 479          * where delta f is the constraint correction
 480          * of the quantity that is being constrained.
 481          */
 482         for(b=b0; b<b1; b++)
 483         {
 484             mvb = lincsd->bllen[b]*sol[b];
 485             for(i=0; i<DIM; i++)
 486             {
 487                 tmp1 = mvb*r[b][i];
 488                 for(j=0; j<DIM; j++)
 489                 {
 490                     rmdf[i][j] += tmp1*r[b][j];
 491                 }
 492             }
 493         } /* 23 ncons flops */
 494     }
 495 }
 496
 497 static void do_lincs(rvec *x,rvec *xp,matrix box,t_pbc *pbc,
 498                      struct gmx_lincsdata *lincsd,int th,
 499                      real *invmass,
 500                                          t_commrec *cr,
 501                      gmx_bool bCalcLambda,
 502                      real wangle,int *warn,
 503                      real invdt,rvec *v,
 504                      gmx_bool bCalcVir,tensor vir_r_m_dr)
 505 {
 506     int     b0,b1,b,i,j,k,n,iter;
 507     real    tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,len2,dlen2,wfac;
 508     rvec    dx;
 509     int     *bla,*blnr,*blbnb;
 510     rvec    *r;
 511     real    *blc,*blmf,*bllen,*blcc,*rhs1,*rhs2,*sol,*blc_sol,*mlambda;
 512     int     *nlocat;
 513
 514     b0 = lincsd->th[th].b0;
 515     b1 = lincsd->th[th].b1;
 516
 517     bla    = lincsd->bla;
 518     r      = lincsd->tmpv;
 519     blnr   = lincsd->blnr;
 520     blbnb  = lincsd->blbnb;
 521     blc    = lincsd->blc;
 522     blmf   = lincsd->blmf;
 523     bllen  = lincsd->bllen;
 524     blcc   = lincsd->tmpncc;
 525     rhs1   = lincsd->tmp1;
 526     rhs2   = lincsd->tmp2;
 527     sol    = lincsd->tmp3;
 528     blc_sol= lincsd->tmp4;
 529     mlambda= lincsd->mlambda;
 530
 531     if (DOMAINDECOMP(cr) && cr->dd->constraints)
 532     {
 533         nlocat = dd_constraints_nlocalatoms(cr->dd);
 534     }
 535     else if (PARTDECOMP(cr))
 536     {
 537         nlocat = pd_constraints_nlocalatoms(cr->pd);
 538     }
 539     else
 540     {
 541         nlocat = NULL;
 542     }
 543
 544     if (pbc)
 545     {
 546         /* Compute normalized i-j vectors */
 547         for(b=b0; b<b1; b++)
 548         {
 549             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
 550             unitv(dx,r[b]);
 551         }
 552 #pragma omp barrier
 553         for(b=b0; b<b1; b++)
 554         {
 555             for(n=blnr[b]; n<blnr[b+1]; n++)
 556             {
 557                 blcc[n] = blmf[n]*iprod(r[b],r[blbnb[n]]);
 558             }
 559             pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
 560             mvb = blc[b]*(iprod(r[b],dx) - bllen[b]);
 561             rhs1[b] = mvb;
 562             sol[b]  = mvb;
 563         }
 564     }
 565     else
 566     {
 567         /* Compute normalized i-j vectors */
 568         for(b=b0; b<b1; b++)
 569         {
 570             i = bla[2*b];
 571             j = bla[2*b+1];
 572             tmp0 = x[i][0] - x[j][0];
 573             tmp1 = x[i][1] - x[j][1];
 574             tmp2 = x[i][2] - x[j][2];
 575             rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
 576             r[b][0] = rlen*tmp0;
 577             r[b][1] = rlen*tmp1;
 578             r[b][2] = rlen*tmp2;
 579         } /* 16 ncons flops */
 580
 581 #pragma omp barrier
 582         for(b=b0; b<b1; b++)
 583         {
 584             tmp0 = r[b][0];
 585             tmp1 = r[b][1];
 586             tmp2 = r[b][2];
 587             len = bllen[b];
 588             i = bla[2*b];
 589             j = bla[2*b+1];
 590             for(n=blnr[b]; n<blnr[b+1]; n++)
 591             {
 592                 k = blbnb[n];
 593                 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 594             } /* 6 nr flops */
 595             mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
 596                           tmp1*(xp[i][1] - xp[j][1]) +
 597                           tmp2*(xp[i][2] - xp[j][2]) - len);
 598             rhs1[b] = mvb;
 599             sol[b]  = mvb;
 600             /* 10 flops */
 601         }
 602         /* Together: 26*ncons + 6*nrtot flops */
 603     }
 604
 605     lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 606     /* nrec*(ncons+2*nrtot) flops */
 607
 608     for(b=b0; b<b1; b++)
 609     {
 610         mlambda[b] = blc[b]*sol[b];
 611     }
 612
 613     /* Update the coordinates */
 614     lincs_update_atoms(lincsd,th,1.0,mlambda,r,invmass,xp);
 615
 616     /*
 617      ********  Correction for centripetal effects  ********
 618      */
 619
 620     wfac = cos(DEG2RAD*wangle);
 621     wfac = wfac*wfac;
 622
 623     for(iter=0; iter<lincsd->nIter; iter++)
 624     {
 625         if ((lincsd->bCommIter && DOMAINDECOMP(cr) && cr->dd->constraints) ||
 626             PARTDECOMP(cr))
 627         {
 628 #pragma omp barrier
 629 #pragma omp master
 630             {
 631                  /* Communicate the corrected non-local coordinates */
 632                 if (DOMAINDECOMP(cr))
 633                 {
 634                     dd_move_x_constraints(cr->dd,box,xp,NULL);
 635                 }
 636                 else
 637                 {
 638                     pd_move_x_constraints(cr,xp,NULL);
 639                 }
 640             }
 641         }
 642
 643 #pragma omp barrier
 644         for(b=b0; b<b1; b++)
 645         {
 646             len = bllen[b];
 647             if (pbc)
 648             {
 649                 pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
 650             }
 651             else
 652             {
 653                 rvec_sub(xp[bla[2*b]],xp[bla[2*b+1]],dx);
 654             }
 655             len2 = len*len;
 656             dlen2 = 2*len2 - norm2(dx);
 657             if (dlen2 < wfac*len2 && (nlocat==NULL || nlocat[b]))
 658             {
 659                 *warn = b;
 660             }
 661             if (dlen2 > 0)
 662             {
 663                 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
 664             }
 665             else
 666             {
 667                 mvb = blc[b]*len;
 668             }
 669             rhs1[b] = mvb;
 670             sol[b]  = mvb;
 671         } /* 20*ncons flops */
 672
 673         lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
 674         /* nrec*(ncons+2*nrtot) flops */
 675
 676         for(b=b0; b<b1; b++)
 677         {
 678             mvb = blc[b]*sol[b];
 679             blc_sol[b]  = mvb;
 680             mlambda[b] += mvb;
 681         }
 682
 683         /* Update the coordinates */
 684         lincs_update_atoms(lincsd,th,1.0,blc_sol,r,invmass,xp);
 685     }
 686     /* nit*ncons*(37+9*nrec) flops */
 687
 688     if (v != NULL)
 689     {
 690         /* Update the velocities */
 691         lincs_update_atoms(lincsd,th,invdt,mlambda,r,invmass,v);
 692         /* 16 ncons flops */
 693     }
 694
 695     if (nlocat != NULL && bCalcLambda)
 696     {
 697         /* In lincs_update_atoms thread might cross-read mlambda */
 698 #pragma omp barrier
 699
 700         /* Only account for local atoms */
 701         for(b=b0; b<b1; b++)
 702         {
 703             mlambda[b] *= 0.5*nlocat[b];
 704         }
 705     }
 706
 707     if (bCalcVir)
 708     {
 709         /* Constraint virial */
 710         for(b=b0; b<b1; b++)
 711         {
 712             tmp0 = -bllen[b]*mlambda[b];
 713             for(i=0; i<DIM; i++)
 714             {
 715                 tmp1 = tmp0*r[b][i];
 716                 for(j=0; j<DIM; j++)
 717                 {
 718                     vir_r_m_dr[i][j] -= tmp1*r[b][j];
 719                 }
 720             }
 721         } /* 22 ncons flops */
 722     }
 723
 724     /* Total:
 725      * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
 726      * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
 727      *
 728      * (26+nrec)*ncons + (6+2*nrec)*nrtot
 729      * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
 730      * if nit=1
 731      * (63+nrec)*ncons + (6+4*nrec)*nrtot
 732      */
 733 }
 734
 735 void set_lincs_matrix(struct gmx_lincsdata *li,real *invmass,real lambda)
 736 {
 737     int i,a1,a2,n,k,sign,center;
 738     int end,nk,kk;
 739     const real invsqrt2=0.7071067811865475244;
 740
 741     for(i=0; (i<li->nc); i++)
 742     {
 743         a1 = li->bla[2*i];
 744         a2 = li->bla[2*i+1];
 745         li->blc[i]  = gmx_invsqrt(invmass[a1] + invmass[a2]);
 746         li->blc1[i] = invsqrt2;
 747     }
 748
 749     /* Construct the coupling coefficient matrix blmf */
 750     li->ntriangle = 0;
 751     li->ncc_triangle = 0;
 752     for(i=0; (i<li->nc); i++)
 753     {
 754         a1 = li->bla[2*i];
 755         a2 = li->bla[2*i+1];
 756         for(n=li->blnr[i]; (n<li->blnr[i+1]); n++)
 757         {
 758             k = li->blbnb[n];
 759             if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
 760             {
 761                 sign = -1;
 762             }
 763             else
 764             {
 765                 sign = 1;
 766             }
 767             if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
 768             {
 769                 center = a1;
 770                 end    = a2;
 771             }
 772             else
 773             {
 774                 center = a2;
 775                 end    = a1;
 776             }
 777             li->blmf[n]  = sign*invmass[center]*li->blc[i]*li->blc[k];
 778             li->blmf1[n] = sign*0.5;
 779             if (li->ncg_triangle > 0)
 780             {
 781                 /* Look for constraint triangles */
 782                 for(nk=li->blnr[k]; (nk<li->blnr[k+1]); nk++)
 783                 {
 784                     kk = li->blbnb[nk];
 785                     if (kk != i && kk != k &&
 786                         (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
 787                     {
 788                         if (li->ntriangle == 0 ||
 789                             li->triangle[li->ntriangle-1] < i)
 790                         {
 791                             /* Add this constraint to the triangle list */
 792                             li->triangle[li->ntriangle] = i;
 793                             li->tri_bits[li->ntriangle] = 0;
 794                             li->ntriangle++;
 795                             if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
 796                             {
 797                                 gmx_fatal(FARGS,"A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
 798                                           li->blnr[i+1] - li->blnr[i],
 799                                           sizeof(li->tri_bits[0])*8-1);
 800                             }
 801                         }
 802                         li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
 803                         li->ncc_triangle++;
 804                     }
 805                 }
 806             }
 807         }
 808     }
 809
 810     if (debug)
 811     {
 812         fprintf(debug,"Of the %d constraints %d participate in triangles\n",
 813                 li->nc,li->ntriangle);
 814         fprintf(debug,"There are %d couplings of which %d in triangles\n",
 815                 li->ncc,li->ncc_triangle);
 816     }
 817
 818     /* Set matlam,
 819      * so we know with which lambda value the masses have been set.
 820      */
 821     li->matlam = lambda;
 822 }
 823
 824 static int count_triangle_constraints(t_ilist *ilist,t_blocka *at2con)
 825 {
 826     int  ncon1,ncon_tot;
 827     int  c0,a00,a01,n1,c1,a10,a11,ac1,n2,c2,a20,a21;
 828     int  ncon_triangle;
 829     gmx_bool bTriangle;
 830     t_iatom *ia1,*ia2,*iap;
 831
 832     ncon1    = ilist[F_CONSTR].nr/3;
 833     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 834
 835     ia1 = ilist[F_CONSTR].iatoms;
 836     ia2 = ilist[F_CONSTRNC].iatoms;
 837
 838     ncon_triangle = 0;
 839     for(c0=0; c0<ncon_tot; c0++)
 840     {
 841         bTriangle = FALSE;
 842         iap = constr_iatomptr(ncon1,ia1,ia2,c0);
 843         a00 = iap[1];
 844         a01 = iap[2];
 845         for(n1=at2con->index[a01]; n1<at2con->index[a01+1]; n1++)
 846         {
 847             c1 = at2con->a[n1];
 848             if (c1 != c0)
 849             {
 850                 iap = constr_iatomptr(ncon1,ia1,ia2,c1);
 851                 a10 = iap[1];
 852                 a11 = iap[2];
 853                 if (a10 == a01)
 854                 {
 855                     ac1 = a11;
 856                 }
 857                 else
 858                 {
 859                     ac1 = a10;
 860                 }
 861                 for(n2=at2con->index[ac1]; n2<at2con->index[ac1+1]; n2++)
 862                 {
 863                     c2 = at2con->a[n2];
 864                     if (c2 != c0 && c2 != c1)
 865                     {
 866                         iap = constr_iatomptr(ncon1,ia1,ia2,c2);
 867                         a20 = iap[1];
 868                         a21 = iap[2];
 869                         if (a20 == a00 || a21 == a00)
 870                         {
 871                             bTriangle = TRUE;
 872                         }
 873                     }
 874                 }
 875             }
 876         }
 877         if (bTriangle)
 878         {
 879             ncon_triangle++;
 880         }
 881     }
 882
 883     return ncon_triangle;
 884 }
 885
 886 static gmx_bool more_than_two_sequential_constraints(const t_ilist *ilist,
 887                                                      const t_blocka *at2con)
 888 {
 889     t_iatom  *ia1,*ia2,*iap;
 890     int      ncon1,ncon_tot,c;
 891     int      a1,a2;
 892     gmx_bool bMoreThanTwoSequentialConstraints;
 893
 894     ncon1    = ilist[F_CONSTR].nr/3;
 895     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 896
 897     ia1 = ilist[F_CONSTR].iatoms;
 898     ia2 = ilist[F_CONSTRNC].iatoms;
 899
 900     bMoreThanTwoSequentialConstraints = FALSE;
 901     for(c=0; c<ncon_tot && !bMoreThanTwoSequentialConstraints; c++)
 902     {
 903         iap = constr_iatomptr(ncon1,ia1,ia2,c);
 904         a1 = iap[1];
 905         a2 = iap[2];
 906         /* Check if this constraint has constraints connected at both atoms */
 907         if (at2con->index[a1+1] - at2con->index[a1] > 1 &&
 908             at2con->index[a2+1] - at2con->index[a2] > 1)
 909         {
 910             bMoreThanTwoSequentialConstraints = TRUE;
 911         }
 912     }
 913
 914     return bMoreThanTwoSequentialConstraints;
 915 }
 916
 917 static int int_comp(const void *a,const void *b)
 918 {
 919     return (*(int *)a) - (*(int *)b);
 920 }
 921
 922 gmx_lincsdata_t init_lincs(FILE *fplog,gmx_mtop_t *mtop,
 923                            int nflexcon_global,t_blocka *at2con,
 924                            gmx_bool bPLINCS,int nIter,int nProjOrder)
 925 {
 926     struct gmx_lincsdata *li;
 927     int mb;
 928     gmx_moltype_t *molt;
 929
 930     if (fplog)
 931     {
 932         fprintf(fplog,"\nInitializing%s LINear Constraint Solver\n",
 933                 bPLINCS ? " Parallel" : "");
 934     }
 935
 936     snew(li,1);
 937
 938     li->ncg      =
 939         gmx_mtop_ftype_count(mtop,F_CONSTR) +
 940         gmx_mtop_ftype_count(mtop,F_CONSTRNC);
 941     li->ncg_flex = nflexcon_global;
 942
 943     li->nIter  = nIter;
 944     li->nOrder = nProjOrder;
 945
 946     li->ncg_triangle = 0;
 947     li->bCommIter = FALSE;
 948     for(mb=0; mb<mtop->nmolblock; mb++)
 949     {
 950         molt = &mtop->moltype[mtop->molblock[mb].type];
 951         li->ncg_triangle +=
 952             mtop->molblock[mb].nmol*
 953             count_triangle_constraints(molt->ilist,
 954                                        &at2con[mtop->molblock[mb].type]);
 955         if (bPLINCS && li->bCommIter == FALSE)
 956         {
 957             /* Check if we need to communicate not only before LINCS,
 958              * but also before each iteration.
 959              * The check for only two sequential constraints is only
 960              * useful for the common case of H-bond only constraints.
 961              * With more effort we could also make it useful for small
 962              * molecules with nr. sequential constraints <= nOrder-1.
 963              */
 964             li->bCommIter = (li->nOrder < 1 || more_than_two_sequential_constraints(molt->ilist,&at2con[mtop->molblock[mb].type]));
 965         }
 966     }
 967     if (debug && bPLINCS)
 968     {
 969         fprintf(debug,"PLINCS communication before each iteration: %d\n",
 970                 li->bCommIter);
 971     }
 972
 973     /* LINCS can run on any number of threads.
 974      * Currently the number is fixed for the whole simulation,
 975      * but it could be set in set_lincs().
 976      */
 977     li->nth = gmx_omp_nthreads_get(emntLINCS);
 978     if (li->nth == 1)
 979     {
 980         snew(li->th,1);
 981     }
 982     else
 983     {
 984         /* Allocate an extra elements for "thread-overlap" constraints */
 985         snew(li->th,li->nth+1);
 986     }
 987     if (debug)
 988     {
 989         fprintf(debug,"LINCS: using %d threads\n",li->nth);
 990     }
 991
 992     if (bPLINCS || li->ncg_triangle > 0)
 993     {
 994         please_cite(fplog,"Hess2008a");
 995     }
 996     else
 997     {
 998         please_cite(fplog,"Hess97a");
 999     }
1000
1001     if (fplog)
1002     {
1003         fprintf(fplog,"The number of constraints is %d\n",li->ncg);
1004         if (bPLINCS)
1005         {
1006             fprintf(fplog,"There are inter charge-group constraints,\n"
1007                     "will communicate selected coordinates each lincs iteration\n");
1008         }
1009         if (li->ncg_triangle > 0)
1010         {
1011             fprintf(fplog,
1012                     "%d constraints are involved in constraint triangles,\n"
1013                     "will apply an additional matrix expansion of order %d for couplings\n"
1014                     "between constraints inside triangles\n",
1015                     li->ncg_triangle,li->nOrder);
1016         }
1017     }
1018
1019     return li;
1020 }
1021
1022 /* Sets up the work division over the threads */
1023 static void lincs_thread_setup(struct gmx_lincsdata *li,int natoms)
1024 {
1025     lincs_thread_t *li_m;
1026     int th;
1027     unsigned *atf;
1028     int a;
1029
1030     if (natoms > li->atf_nalloc)
1031     {
1032         li->atf_nalloc = over_alloc_large(natoms);
1033         srenew(li->atf,li->atf_nalloc);
1034     }
1035
1036     atf = li->atf;
1037     /* Clear the atom flags */
1038     for(a=0; a<natoms; a++)
1039     {
1040         atf[a] = 0;
1041     }
1042
1043     for(th=0; th<li->nth; th++)
1044     {
1045         lincs_thread_t *li_th;
1046         int b;
1047
1048         li_th = &li->th[th];
1049
1050         /* The constraints are divided equally over the threads */
1051         li_th->b0 = (li->nc* th   )/li->nth;
1052         li_th->b1 = (li->nc*(th+1))/li->nth;
1053
1054         if (th < sizeof(*atf)*8)
1055         {
1056             /* For each atom set a flag for constraints from each */
1057             for(b=li_th->b0; b<li_th->b1; b++)
1058             {
1059                 atf[li->bla[b*2]  ] |= (1U<<th);
1060                 atf[li->bla[b*2+1]] |= (1U<<th);
1061             }
1062         }
1063     }
1064
1065 #pragma omp parallel for num_threads(li->nth) schedule(static)
1066     for(th=0; th<li->nth; th++)
1067     {
1068         lincs_thread_t *li_th;
1069         unsigned mask;
1070         int b;
1071
1072         li_th = &li->th[th];
1073
1074         if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
1075         {
1076             li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
1077             srenew(li_th->ind,li_th->ind_nalloc);
1078             srenew(li_th->ind_r,li_th->ind_nalloc);
1079         }
1080
1081         if (th < sizeof(*atf)*8)
1082         {
1083             mask = (1U<<th) - 1U;
1084
1085             li_th->nind   = 0;
1086             li_th->nind_r = 0;
1087             for(b=li_th->b0; b<li_th->b1; b++)
1088             {
1089                 /* We let the constraint with the lowest thread index
1090                  * operate on atoms with constraints from multiple threads.
1091                  */
1092                 if (((atf[li->bla[b*2]]   & mask) == 0) &&
1093                     ((atf[li->bla[b*2+1]] & mask) == 0))
1094                 {
1095                     /* Add the constraint to the local atom update index */
1096                     li_th->ind[li_th->nind++] = b;
1097                 }
1098                 else
1099                 {
1100                     /* Add the constraint to the rest block */
1101                     li_th->ind_r[li_th->nind_r++] = b;
1102                 }
1103             }
1104         }
1105         else
1106         {
1107             /* We are out of bits, assign all constraints to rest */
1108             for(b=li_th->b0; b<li_th->b1; b++)
1109             {
1110                 li_th->ind_r[li_th->nind_r++] = b;
1111             }
1112         }
1113     }
1114
1115     /* We need to copy all constraints which have not be assigned
1116      * to a thread to a separate list which will be handled by one thread.
1117      */
1118     li_m = &li->th[li->nth];
1119
1120     li_m->nind = 0;
1121     for(th=0; th<li->nth; th++)
1122     {
1123         lincs_thread_t *li_th;
1124         int b;
1125
1126         li_th   = &li->th[th];
1127
1128         if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1129         {
1130             li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1131             srenew(li_m->ind,li_m->ind_nalloc);
1132         }
1133
1134         for(b=0; b<li_th->nind_r; b++)
1135         {
1136             li_m->ind[li_m->nind++] = li_th->ind_r[b];
1137         }
1138
1139         if (debug)
1140         {
1141             fprintf(debug,"LINCS thread %d: %d constraints\n",
1142                     th,li_th->nind);
1143         }
1144     }
1145
1146     if (debug)
1147     {
1148         fprintf(debug,"LINCS thread r: %d constraints\n",
1149                 li_m->nind);
1150     }
1151 }
1152
1153
1154 void set_lincs(t_idef *idef,t_mdatoms *md,
1155                gmx_bool bDynamics,t_commrec *cr,
1156                struct gmx_lincsdata *li)
1157 {
1158     int      start,natoms,nflexcon;
1159     t_blocka at2con;
1160     t_iatom  *iatom;
1161     int      i,k,ncc_alloc,ni,con,nconnect,concon;
1162     int      type,a1,a2;
1163     real     lenA=0,lenB;
1164     gmx_bool     bLocal;
1165
1166     li->nc = 0;
1167     li->ncc = 0;
1168     /* Zero the thread index ranges.
1169      * Otherwise without local constraints we could return with old ranges.
1170      */
1171     for(i=0; i<li->nth; i++)
1172     {
1173         li->th[i].b0   = 0;
1174         li->th[i].b1   = 0;
1175         li->th[i].nind = 0;
1176     }
1177     if (li->nth > 1)
1178     {
1179         li->th[li->nth].nind = 0;
1180     }
1181
1182     /* This is the local topology, so there are only F_CONSTR constraints */
1183     if (idef->il[F_CONSTR].nr == 0)
1184     {
1185         /* There are no constraints,
1186          * we do not need to fill any data structures.
1187          */
1188         return;
1189     }
1190
1191     if (debug)
1192     {
1193         fprintf(debug,"Building the LINCS connectivity\n");
1194     }
1195
1196     if (DOMAINDECOMP(cr))
1197     {
1198         if (cr->dd->constraints)
1199         {
1200             dd_get_constraint_range(cr->dd,&start,&natoms);
1201         }
1202         else
1203         {
1204             natoms = cr->dd->nat_home;
1205         }
1206         start = 0;
1207     }
1208     else if(PARTDECOMP(cr))
1209         {
1210                 pd_get_constraint_range(cr->pd,&start,&natoms);
1211         }
1212         else
1213     {
1214         start  = md->start;
1215         natoms = md->homenr;
1216     }
1217     at2con = make_at2con(start,natoms,idef->il,idef->iparams,bDynamics,
1218                          &nflexcon);
1219
1220
1221     if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1222     {
1223         li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1224         srenew(li->bllen0,li->nc_alloc);
1225         srenew(li->ddist,li->nc_alloc);
1226         srenew(li->bla,2*li->nc_alloc);
1227         srenew(li->blc,li->nc_alloc);
1228         srenew(li->blc1,li->nc_alloc);
1229         srenew(li->blnr,li->nc_alloc+1);
1230         srenew(li->bllen,li->nc_alloc);
1231         srenew(li->tmpv,li->nc_alloc);
1232         srenew(li->tmp1,li->nc_alloc);
1233         srenew(li->tmp2,li->nc_alloc);
1234         srenew(li->tmp3,li->nc_alloc);
1235         srenew(li->tmp4,li->nc_alloc);
1236         srenew(li->mlambda,li->nc_alloc);
1237         if (li->ncg_triangle > 0)
1238         {
1239             /* This is allocating too much, but it is difficult to improve */
1240             srenew(li->triangle,li->nc_alloc);
1241             srenew(li->tri_bits,li->nc_alloc);
1242         }
1243     }
1244
1245     iatom = idef->il[F_CONSTR].iatoms;
1246
1247     ncc_alloc = li->ncc_alloc;
1248     li->blnr[0] = 0;
1249
1250     ni = idef->il[F_CONSTR].nr/3;
1251
1252     con = 0;
1253     nconnect = 0;
1254     li->blnr[con] = nconnect;
1255     for(i=0; i<ni; i++)
1256     {
1257         bLocal = TRUE;
1258         type = iatom[3*i];
1259         a1   = iatom[3*i+1];
1260         a2   = iatom[3*i+2];
1261         lenA = idef->iparams[type].constr.dA;
1262         lenB = idef->iparams[type].constr.dB;
1263         /* Skip the flexible constraints when not doing dynamics */
1264         if (bDynamics || lenA!=0 || lenB!=0)
1265         {
1266             li->bllen0[con]  = lenA;
1267             li->ddist[con]   = lenB - lenA;
1268             /* Set the length to the topology A length */
1269             li->bllen[con]   = li->bllen0[con];
1270             li->bla[2*con]   = a1;
1271             li->bla[2*con+1] = a2;
1272             /* Construct the constraint connection matrix blbnb */
1273             for(k=at2con.index[a1-start]; k<at2con.index[a1-start+1]; k++)
1274             {
1275                 concon = at2con.a[k];
1276                 if (concon != i)
1277                 {
1278                     if (nconnect >= ncc_alloc)
1279                     {
1280                         ncc_alloc = over_alloc_small(nconnect+1);
1281                         srenew(li->blbnb,ncc_alloc);
1282                     }
1283                     li->blbnb[nconnect++] = concon;
1284                 }
1285             }
1286             for(k=at2con.index[a2-start]; k<at2con.index[a2-start+1]; k++)
1287             {
1288                 concon = at2con.a[k];
1289                 if (concon != i)
1290                 {
1291                     if (nconnect+1 > ncc_alloc)
1292                     {
1293                         ncc_alloc = over_alloc_small(nconnect+1);
1294                         srenew(li->blbnb,ncc_alloc);
1295                     }
1296                     li->blbnb[nconnect++] = concon;
1297                 }
1298             }
1299             li->blnr[con+1] = nconnect;
1300
1301             if (cr->dd == NULL)
1302             {
1303                 /* Order the blbnb matrix to optimize memory access */
1304                 qsort(&(li->blbnb[li->blnr[con]]),li->blnr[con+1]-li->blnr[con],
1305                       sizeof(li->blbnb[0]),int_comp);
1306             }
1307             /* Increase the constraint count */
1308             con++;
1309         }
1310     }
1311
1312     done_blocka(&at2con);
1313
1314     /* This is the real number of constraints,
1315      * without dynamics the flexible constraints are not present.
1316      */
1317     li->nc = con;
1318
1319     li->ncc = li->blnr[con];
1320     if (cr->dd == NULL)
1321     {
1322         /* Since the matrix is static, we can free some memory */
1323         ncc_alloc = li->ncc;
1324         srenew(li->blbnb,ncc_alloc);
1325     }
1326
1327     if (ncc_alloc > li->ncc_alloc)
1328     {
1329         li->ncc_alloc = ncc_alloc;
1330         srenew(li->blmf,li->ncc_alloc);
1331         srenew(li->blmf1,li->ncc_alloc);
1332         srenew(li->tmpncc,li->ncc_alloc);
1333     }
1334
1335     if (debug)
1336     {
1337         fprintf(debug,"Number of constraints is %d, couplings %d\n",
1338                 li->nc,li->ncc);
1339     }
1340
1341     if (li->nth == 1)
1342     {
1343         li->th[0].b0 = 0;
1344         li->th[0].b1 = li->nc;
1345     }
1346     else
1347     {
1348         lincs_thread_setup(li,md->nr);
1349     }
1350
1351     set_lincs_matrix(li,md->invmass,md->lambda);
1352 }
1353
1354 static void lincs_warning(FILE *fplog,
1355                           gmx_domdec_t *dd,rvec *x,rvec *xprime,t_pbc *pbc,
1356                           int ncons,int *bla,real *bllen,real wangle,
1357                           int maxwarn,int *warncount)
1358 {
1359     int b,i,j;
1360     rvec v0,v1;
1361     real wfac,d0,d1,cosine;
1362     char buf[STRLEN];
1363
1364     wfac=cos(DEG2RAD*wangle);
1365
1366     sprintf(buf,"bonds that rotated more than %g degrees:\n"
1367             " atom 1 atom 2  angle  previous, current, constraint length\n",
1368             wangle);
1369     fprintf(stderr,"%s",buf);
1370     if (fplog)
1371     {
1372         fprintf(fplog,"%s",buf);
1373     }
1374
1375     for(b=0;b<ncons;b++)
1376     {
1377         i = bla[2*b];
1378         j = bla[2*b+1];
1379         if (pbc)
1380         {
1381             pbc_dx_aiuc(pbc,x[i],x[j],v0);
1382             pbc_dx_aiuc(pbc,xprime[i],xprime[j],v1);
1383         }
1384         else
1385         {
1386             rvec_sub(x[i],x[j],v0);
1387             rvec_sub(xprime[i],xprime[j],v1);
1388         }
1389         d0 = norm(v0);
1390         d1 = norm(v1);
1391         cosine = iprod(v0,v1)/(d0*d1);
1392         if (cosine < wfac)
1393         {
1394             sprintf(buf," %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n",
1395                     ddglatnr(dd,i),ddglatnr(dd,j),
1396                     RAD2DEG*acos(cosine),d0,d1,bllen[b]);
1397             fprintf(stderr,"%s",buf);
1398             if (fplog)
1399             {
1400                 fprintf(fplog,"%s",buf);
1401             }
1402             if (!gmx_isfinite(d1))
1403             {
1404                 gmx_fatal(FARGS,"Bond length not finite.");
1405             }
1406
1407             (*warncount)++;
1408         }
1409     }
1410     if (*warncount > maxwarn)
1411     {
1412         too_many_constraint_warnings(econtLINCS,*warncount);
1413     }
1414 }
1415
1416 static void cconerr(gmx_domdec_t *dd,
1417                     int ncons,int *bla,real *bllen,rvec *x,t_pbc *pbc,
1418                     real *ncons_loc,real *ssd,real *max,int *imax)
1419 {
1420     real      len,d,ma,ssd2,r2;
1421     int       *nlocat,count,b,im;
1422     rvec      dx;
1423
1424     if (dd && dd->constraints)
1425     {
1426         nlocat = dd_constraints_nlocalatoms(dd);
1427     }
1428     else
1429     {
1430         nlocat = 0;
1431     }
1432
1433     ma = 0;
1434     ssd2 = 0;
1435     im = 0;
1436     count = 0;
1437     for(b=0;b<ncons;b++)
1438     {
1439         if (pbc)
1440         {
1441             pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
1442         }
1443         else {
1444             rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
1445         }
1446         r2 = norm2(dx);
1447         len = r2*gmx_invsqrt(r2);
1448         d = fabs(len/bllen[b]-1);
1449         if (d > ma && (nlocat==NULL || nlocat[b]))
1450         {
1451             ma = d;
1452             im = b;
1453         }
1454         if (nlocat == NULL)
1455         {
1456             ssd2 += d*d;
1457             count++;
1458         }
1459         else
1460         {
1461             ssd2 += nlocat[b]*d*d;
1462             count += nlocat[b];
1463         }
1464     }
1465
1466     *ncons_loc = (nlocat ? 0.5 : 1)*count;
1467     *ssd       = (nlocat ? 0.5 : 1)*ssd2;
1468     *max = ma;
1469     *imax = im;
1470 }
1471
1472 static void dump_conf(gmx_domdec_t *dd,struct gmx_lincsdata *li,
1473                       t_blocka *at2con,
1474                       char *name,gmx_bool bAll,rvec *x,matrix box)
1475 {
1476     char str[STRLEN];
1477     FILE *fp;
1478     int  ac0,ac1,i;
1479
1480     dd_get_constraint_range(dd,&ac0,&ac1);
1481
1482     sprintf(str,"%s_%d_%d_%d.pdb",name,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
1483     fp = gmx_fio_fopen(str,"w");
1484     fprintf(fp,"CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1\n",
1485             10*norm(box[XX]),10*norm(box[YY]),10*norm(box[ZZ]),
1486             90.0,90.0,90.0);
1487     for(i=0; i<ac1; i++)
1488     {
1489         if (i < dd->nat_home || (bAll && i >= ac0 && i < ac1))
1490         {
1491             fprintf(fp,"%-6s%5u  %-4.4s%3.3s %c%4d    %8.3f%8.3f%8.3f%6.2f%6.2f\n",
1492                     "ATOM",ddglatnr(dd,i),"C","ALA",' ',i+1,
1493                     10*x[i][XX],10*x[i][YY],10*x[i][ZZ],
1494                     1.0,i<dd->nat_tot ? 0.0 : 1.0);
1495         }
1496     }
1497     if (bAll)
1498     {
1499         for(i=0; i<li->nc; i++)
1500         {
1501             fprintf(fp,"CONECT%5d%5d\n",
1502                     ddglatnr(dd,li->bla[2*i]),
1503                     ddglatnr(dd,li->bla[2*i+1]));
1504         }
1505     }
1506     gmx_fio_fclose(fp);
1507 }
1508
1509 gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
1510                          t_inputrec *ir,
1511                          gmx_large_int_t step,
1512                          struct gmx_lincsdata *lincsd,t_mdatoms *md,
1513                          t_commrec *cr,
1514                          rvec *x,rvec *xprime,rvec *min_proj,
1515                          matrix box,t_pbc *pbc,
1516                          real lambda,real *dvdlambda,
1517                          real invdt,rvec *v,
1518                          gmx_bool bCalcVir,tensor vir_r_m_dr,
1519                          int econq,
1520                          t_nrnb *nrnb,
1521                          int maxwarn,int *warncount)
1522 {
1523     char  buf[STRLEN],buf2[22],buf3[STRLEN];
1524     int   i,warn,p_imax,error;
1525     real  ncons_loc,p_ssd,p_max=0;
1526     rvec  dx;
1527     gmx_bool  bOK;
1528
1529     bOK = TRUE;
1530
1531     if (lincsd->nc == 0 && cr->dd == NULL)
1532     {
1533         if (bLog || bEner)
1534         {
1535             lincsd->rmsd_data[0] = 0;
1536             if (ir->eI == eiSD2 && v == NULL)
1537             {
1538                 i = 2;
1539             }
1540             else
1541             {
1542                 i = 1;
1543             }
1544             lincsd->rmsd_data[i] = 0;
1545         }
1546
1547         return bOK;
1548     }
1549
1550     if (econq == econqCoord)
1551     {
1552         if (ir->efep != efepNO)
1553         {
1554             if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1555             {
1556                 set_lincs_matrix(lincsd,md->invmass,md->lambda);
1557             }
1558
1559             for(i=0; i<lincsd->nc; i++)
1560             {
1561                 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1562             }
1563         }
1564
1565         if (lincsd->ncg_flex)
1566         {
1567             /* Set the flexible constraint lengths to the old lengths */
1568             if (pbc != NULL)
1569             {
1570                 for(i=0; i<lincsd->nc; i++)
1571                 {
1572                     if (lincsd->bllen[i] == 0) {
1573                         pbc_dx_aiuc(pbc,x[lincsd->bla[2*i]],x[lincsd->bla[2*i+1]],dx);
1574                         lincsd->bllen[i] = norm(dx);
1575                     }
1576                 }
1577             }
1578             else
1579             {
1580                 for(i=0; i<lincsd->nc; i++)
1581                 {
1582                     if (lincsd->bllen[i] == 0)
1583                     {
1584                         lincsd->bllen[i] =
1585                             sqrt(distance2(x[lincsd->bla[2*i]],
1586                                            x[lincsd->bla[2*i+1]]));
1587                     }
1588                 }
1589             }
1590         }
1591
1592         if (bLog && fplog)
1593         {
1594             cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1595                     &ncons_loc,&p_ssd,&p_max,&p_imax);
1596         }
1597
1598         /* This warn var can be updated by multiple threads
1599          * at the same time. But as we only need to detect
1600          * if a warning occured or not, this is not an issue.
1601          */
1602         warn = -1;
1603
1604         /* The OpenMP parallel region of constrain_lincs for coords */
1605 #pragma omp parallel num_threads(lincsd->nth)
1606         {
1607             int th=gmx_omp_get_thread_num();
1608
1609             clear_mat(lincsd->th[th].vir_r_m_dr);
1610
1611             do_lincs(x,xprime,box,pbc,lincsd,th,
1612                      md->invmass,cr,
1613                      bCalcVir || (ir->efep != efepNO),
1614                      ir->LincsWarnAngle,&warn,
1615                      invdt,v,bCalcVir,
1616                      th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1617         }
1618
1619         if (ir->efep != efepNO)
1620         {
1621             real dt_2,dvdl=0;
1622
1623             dt_2 = 1.0/(ir->delta_t*ir->delta_t);
1624             for(i=0; (i<lincsd->nc); i++)
1625             {
1626                 dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
1627             }
1628             *dvdlambda += dvdl;
1629                 }
1630
1631         if (bLog && fplog && lincsd->nc > 0)
1632         {
1633             fprintf(fplog,"   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
1634             fprintf(fplog,"       Before LINCS          %.6f    %.6f %6d %6d\n",
1635                     sqrt(p_ssd/ncons_loc),p_max,
1636                     ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1637                     ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1638         }
1639         if (bLog || bEner)
1640         {
1641             cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1642                     &ncons_loc,&p_ssd,&p_max,&p_imax);
1643             /* Check if we are doing the second part of SD */
1644             if (ir->eI == eiSD2 && v == NULL)
1645             {
1646                 i = 2;
1647             }
1648             else
1649             {
1650                 i = 1;
1651             }
1652             lincsd->rmsd_data[0] = ncons_loc;
1653             lincsd->rmsd_data[i] = p_ssd;
1654         }
1655         else
1656         {
1657             lincsd->rmsd_data[0] = 0;
1658             lincsd->rmsd_data[1] = 0;
1659             lincsd->rmsd_data[2] = 0;
1660         }
1661         if (bLog && fplog && lincsd->nc > 0)
1662         {
1663             fprintf(fplog,
1664                     "        After LINCS          %.6f    %.6f %6d %6d\n\n",
1665                     sqrt(p_ssd/ncons_loc),p_max,
1666                     ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1667                     ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1668         }
1669
1670         if (warn >= 0)
1671         {
1672             if (maxwarn >= 0)
1673             {
1674                 cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1675                         &ncons_loc,&p_ssd,&p_max,&p_imax);
1676                 if (MULTISIM(cr))
1677                 {
1678                     sprintf(buf3," in simulation %d", cr->ms->sim);
1679                 }
1680                 else
1681                 {
1682                     buf3[0] = 0;
1683                 }
1684                 sprintf(buf,"\nStep %s, time %g (ps)  LINCS WARNING%s\n"
1685                         "relative constraint deviation after LINCS:\n"
1686                         "rms %.6f, max %.6f (between atoms %d and %d)\n",
1687                         gmx_step_str(step,buf2),ir->init_t+step*ir->delta_t,
1688                         buf3,
1689                         sqrt(p_ssd/ncons_loc),p_max,
1690                         ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1691                         ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1692                 if (fplog)
1693                 {
1694                     fprintf(fplog,"%s",buf);
1695                 }
1696                 fprintf(stderr,"%s",buf);
1697                 lincs_warning(fplog,cr->dd,x,xprime,pbc,
1698                               lincsd->nc,lincsd->bla,lincsd->bllen,
1699                               ir->LincsWarnAngle,maxwarn,warncount);
1700             }
1701             bOK = (p_max < 0.5);
1702         }
1703
1704         if (lincsd->ncg_flex) {
1705             for(i=0; (i<lincsd->nc); i++)
1706                 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1707                     lincsd->bllen[i] = 0;
1708         }
1709     }
1710     else
1711     {
1712         /* The OpenMP parallel region of constrain_lincs for derivatives */
1713 #pragma omp parallel num_threads(lincsd->nth)
1714         {
1715             int th=gmx_omp_get_thread_num();
1716
1717             do_lincsp(x,xprime,min_proj,pbc,lincsd,th,
1718                       md->invmass,econq,ir->efep != efepNO ? dvdlambda : NULL,
1719                       bCalcVir,th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1720         }
1721     }
1722
1723     if (bCalcVir && lincsd->nth > 1)
1724     {
1725         for(i=1; i<lincsd->nth; i++)
1726         {
1727             m_add(vir_r_m_dr,lincsd->th[i].vir_r_m_dr,vir_r_m_dr);
1728         }
1729     }
1730
1731     /* count assuming nit=1 */
1732     inc_nrnb(nrnb,eNR_LINCS,lincsd->nc);
1733     inc_nrnb(nrnb,eNR_LINCSMAT,(2+lincsd->nOrder)*lincsd->ncc);
1734     if (lincsd->ntriangle > 0)
1735     {
1736         inc_nrnb(nrnb,eNR_LINCSMAT,lincsd->nOrder*lincsd->ncc_triangle);
1737     }
1738     if (v)
1739     {
1740         inc_nrnb(nrnb,eNR_CONSTR_V,lincsd->nc*2);
1741     }
1742     if (bCalcVir)
1743     {
1744         inc_nrnb(nrnb,eNR_CONSTR_VIR,lincsd->nc);
1745     }
1746
1747     return bOK;
1748 }