src/gromacs/mdlib/clincs.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /* This file is completely threadsafe - keep it that way! */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <math.h>
  43 #include "main.h"
  44 #include "constr.h"
  45 #include "copyrite.h"
  46 #include "physics.h"
  47 #include "vec.h"
  48 #include "pbc.h"
  49 #include "gromacs/utility/smalloc.h"
  50 #include "mdrun.h"
  51 #include "nrnb.h"
  52 #include "domdec.h"
  53 #include "mtop_util.h"
  54 #include "gmx_omp_nthreads.h"
  55
  56 #include "gromacs/fileio/gmxfio.h"
  57 #include "gmx_fatal.h"
  58 #include "gromacs/utility/gmxomp.h"
  59
  60 typedef struct {
  61     int    b0;         /* first constraint for this thread */
  62     int    b1;         /* b1-1 is the last constraint for this thread */
  63     int    nind;       /* number of indices */
  64     int   *ind;        /* constraint index for updating atom data */
  65     int    nind_r;     /* number of indices */
  66     int   *ind_r;      /* constraint index for updating atom data */
  67     int    ind_nalloc; /* allocation size of ind and ind_r */
  68     tensor vir_r_m_dr; /* temporary variable for virial calculation */
  69     real   dhdlambda;  /* temporary variable for lambda derivative */
  70 } lincs_thread_t;
  71
  72 typedef struct gmx_lincsdata {
  73     int             ncg;          /* the global number of constraints */
  74     int             ncg_flex;     /* the global number of flexible constraints */
  75     int             ncg_triangle; /* the global number of constraints in triangles */
  76     int             nIter;        /* the number of iterations */
  77     int             nOrder;       /* the order of the matrix expansion */
  78     int             nc;           /* the number of constraints */
  79     int             nc_alloc;     /* the number we allocated memory for */
  80     int             ncc;          /* the number of constraint connections */
  81     int             ncc_alloc;    /* the number we allocated memory for */
  82     real            matlam;       /* the FE lambda value used for filling blc and blmf */
  83     real           *bllen0;       /* the reference distance in topology A */
  84     real           *ddist;        /* the reference distance in top B - the r.d. in top A */
  85     int            *bla;          /* the atom pairs involved in the constraints */
  86     real           *blc;          /* 1/sqrt(invmass1 + invmass2) */
  87     real           *blc1;         /* as blc, but with all masses 1 */
  88     int            *blnr;         /* index into blbnb and blmf */
  89     int            *blbnb;        /* list of constraint connections */
  90     int             ntriangle;    /* the local number of constraints in triangles */
  91     int            *triangle;     /* the list of triangle constraints */
  92     int            *tri_bits;     /* the bits tell if the matrix element should be used */
  93     int             ncc_triangle; /* the number of constraint connections in triangles */
  94     gmx_bool        bCommIter;    /* communicate before each LINCS interation */
  95     real           *blmf;         /* matrix of mass factors for constraint connections */
  96     real           *blmf1;        /* as blmf, but with all masses 1 */
  97     real           *bllen;        /* the reference bond length */
  98     int             nth;          /* The number of threads doing LINCS */
  99     lincs_thread_t *th;           /* LINCS thread division */
 100     unsigned       *atf;          /* atom flags for thread parallelization */
 101     int             atf_nalloc;   /* allocation size of atf */
 102     /* arrays for temporary storage in the LINCS algorithm */
 103     rvec           *tmpv;
 104     real           *tmpncc;
 105     real           *tmp1;
 106     real           *tmp2;
 107     real           *tmp3;
 108     real           *tmp4;
 109     real           *mlambda; /* the Lagrange multipliers * -1 */
 110     /* storage for the constraint RMS relative deviation output */
 111     real            rmsd_data[3];
 112 } t_gmx_lincsdata;
 113
 114 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
 115 {
 116     return lincsd->rmsd_data;
 117 }
 118
 119 real lincs_rmsd(struct gmx_lincsdata *lincsd, gmx_bool bSD2)
 120 {
 121     if (lincsd->rmsd_data[0] > 0)
 122     {
 123         return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
 124     }
 125     else
 126     {
 127         return 0;
 128     }
 129 }
 130
 131 /* Do a set of nrec LINCS matrix multiplications.
 132  * This function will return with up to date thread-local
 133  * constraint data, without an OpenMP barrier.
 134  */
 135 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
 136                                 int b0, int b1,
 137                                 const real *blcc,
 138                                 real *rhs1, real *rhs2, real *sol)
 139 {
 140     int        nrec, rec, b, j, n, nr0, nr1;
 141     real       mvb, *swap;
 142     int        ntriangle, tb, bits;
 143     const int *blnr     = lincsd->blnr, *blbnb = lincsd->blbnb;
 144     const int *triangle = lincsd->triangle, *tri_bits = lincsd->tri_bits;
 145
 146     ntriangle = lincsd->ntriangle;
 147     nrec      = lincsd->nOrder;
 148
 149     for (rec = 0; rec < nrec; rec++)
 150     {
 151 #pragma omp barrier
 152         for (b = b0; b < b1; b++)
 153         {
 154             mvb = 0;
 155             for (n = blnr[b]; n < blnr[b+1]; n++)
 156             {
 157                 j   = blbnb[n];
 158                 mvb = mvb + blcc[n]*rhs1[j];
 159             }
 160             rhs2[b] = mvb;
 161             sol[b]  = sol[b] + mvb;
 162         }
 163         swap = rhs1;
 164         rhs1 = rhs2;
 165         rhs2 = swap;
 166     } /* nrec*(ncons+2*nrtot) flops */
 167
 168     if (ntriangle > 0)
 169     {
 170         /* Perform an extra nrec recursions for only the constraints
 171          * involved in rigid triangles.
 172          * In this way their accuracy should come close to those of the other
 173          * constraints, since traingles of constraints can produce eigenvalues
 174          * around 0.7, while the effective eigenvalue for bond constraints
 175          * is around 0.4 (and 0.7*0.7=0.5).
 176          */
 177         /* We need to copy the temporary array, since only the elements
 178          * for constraints involved in triangles are updated and then
 179          * the pointers are swapped. This saving copying the whole arrary.
 180          * We need barrier as other threads might still be reading from rhs2.
 181          */
 182 #pragma omp barrier
 183         for (b = b0; b < b1; b++)
 184         {
 185             rhs2[b] = rhs1[b];
 186         }
 187 #pragma omp barrier
 188 #pragma omp master
 189         {
 190             for (rec = 0; rec < nrec; rec++)
 191             {
 192                 for (tb = 0; tb < ntriangle; tb++)
 193                 {
 194                     b    = triangle[tb];
 195                     bits = tri_bits[tb];
 196                     mvb  = 0;
 197                     nr0  = blnr[b];
 198                     nr1  = blnr[b+1];
 199                     for (n = nr0; n < nr1; n++)
 200                     {
 201                         if (bits & (1<<(n-nr0)))
 202                         {
 203                             j   = blbnb[n];
 204                             mvb = mvb + blcc[n]*rhs1[j];
 205                         }
 206                     }
 207                     rhs2[b] = mvb;
 208                     sol[b]  = sol[b] + mvb;
 209                 }
 210                 swap = rhs1;
 211                 rhs1 = rhs2;
 212                 rhs2 = swap;
 213             }
 214         } /* flops count is missing here */
 215
 216         /* We need a barrier here as the calling routine will continue
 217          * to operate on the thread-local constraints without barrier.
 218          */
 219 #pragma omp barrier
 220     }
 221 }
 222
 223 static void lincs_update_atoms_noind(int ncons, const int *bla,
 224                                      real prefac,
 225                                      const real *fac, rvec *r,
 226                                      const real *invmass,
 227                                      rvec *x)
 228 {
 229     int  b, i, j;
 230     real mvb, im1, im2, tmp0, tmp1, tmp2;
 231
 232     if (invmass != NULL)
 233     {
 234         for (b = 0; b < ncons; b++)
 235         {
 236             i        = bla[2*b];
 237             j        = bla[2*b+1];
 238             mvb      = prefac*fac[b];
 239             im1      = invmass[i];
 240             im2      = invmass[j];
 241             tmp0     = r[b][0]*mvb;
 242             tmp1     = r[b][1]*mvb;
 243             tmp2     = r[b][2]*mvb;
 244             x[i][0] -= tmp0*im1;
 245             x[i][1] -= tmp1*im1;
 246             x[i][2] -= tmp2*im1;
 247             x[j][0] += tmp0*im2;
 248             x[j][1] += tmp1*im2;
 249             x[j][2] += tmp2*im2;
 250         } /* 16 ncons flops */
 251     }
 252     else
 253     {
 254         for (b = 0; b < ncons; b++)
 255         {
 256             i        = bla[2*b];
 257             j        = bla[2*b+1];
 258             mvb      = prefac*fac[b];
 259             tmp0     = r[b][0]*mvb;
 260             tmp1     = r[b][1]*mvb;
 261             tmp2     = r[b][2]*mvb;
 262             x[i][0] -= tmp0;
 263             x[i][1] -= tmp1;
 264             x[i][2] -= tmp2;
 265             x[j][0] += tmp0;
 266             x[j][1] += tmp1;
 267             x[j][2] += tmp2;
 268         }
 269     }
 270 }
 271
 272 static void lincs_update_atoms_ind(int ncons, const int *ind, const int *bla,
 273                                    real prefac,
 274                                    const real *fac, rvec *r,
 275                                    const real *invmass,
 276                                    rvec *x)
 277 {
 278     int  bi, b, i, j;
 279     real mvb, im1, im2, tmp0, tmp1, tmp2;
 280
 281     if (invmass != NULL)
 282     {
 283         for (bi = 0; bi < ncons; bi++)
 284         {
 285             b        = ind[bi];
 286             i        = bla[2*b];
 287             j        = bla[2*b+1];
 288             mvb      = prefac*fac[b];
 289             im1      = invmass[i];
 290             im2      = invmass[j];
 291             tmp0     = r[b][0]*mvb;
 292             tmp1     = r[b][1]*mvb;
 293             tmp2     = r[b][2]*mvb;
 294             x[i][0] -= tmp0*im1;
 295             x[i][1] -= tmp1*im1;
 296             x[i][2] -= tmp2*im1;
 297             x[j][0] += tmp0*im2;
 298             x[j][1] += tmp1*im2;
 299             x[j][2] += tmp2*im2;
 300         } /* 16 ncons flops */
 301     }
 302     else
 303     {
 304         for (bi = 0; bi < ncons; bi++)
 305         {
 306             b        = ind[bi];
 307             i        = bla[2*b];
 308             j        = bla[2*b+1];
 309             mvb      = prefac*fac[b];
 310             tmp0     = r[b][0]*mvb;
 311             tmp1     = r[b][1]*mvb;
 312             tmp2     = r[b][2]*mvb;
 313             x[i][0] -= tmp0;
 314             x[i][1] -= tmp1;
 315             x[i][2] -= tmp2;
 316             x[j][0] += tmp0;
 317             x[j][1] += tmp1;
 318             x[j][2] += tmp2;
 319         } /* 16 ncons flops */
 320     }
 321 }
 322
 323 static void lincs_update_atoms(struct gmx_lincsdata *li, int th,
 324                                real prefac,
 325                                const real *fac, rvec *r,
 326                                const real *invmass,
 327                                rvec *x)
 328 {
 329     if (li->nth == 1)
 330     {
 331         /* Single thread, we simply update for all constraints */
 332         lincs_update_atoms_noind(li->nc, li->bla, prefac, fac, r, invmass, x);
 333     }
 334     else
 335     {
 336         /* Update the atom vector components for our thread local
 337          * constraints that only access our local atom range.
 338          * This can be done without a barrier.
 339          */
 340         lincs_update_atoms_ind(li->th[th].nind, li->th[th].ind,
 341                                li->bla, prefac, fac, r, invmass, x);
 342
 343         if (li->th[li->nth].nind > 0)
 344         {
 345             /* Update the constraints that operate on atoms
 346              * in multiple thread atom blocks on the master thread.
 347              */
 348 #pragma omp barrier
 349 #pragma omp master
 350             {
 351                 lincs_update_atoms_ind(li->th[li->nth].nind,
 352                                        li->th[li->nth].ind,
 353                                        li->bla, prefac, fac, r, invmass, x);
 354             }
 355         }
 356     }
 357 }
 358
 359 /* LINCS projection, works on derivatives of the coordinates */
 360 static void do_lincsp(rvec *x, rvec *f, rvec *fp, t_pbc *pbc,
 361                       struct gmx_lincsdata *lincsd, int th,
 362                       real *invmass,
 363                       int econq, gmx_bool bCalcDHDL,
 364                       gmx_bool bCalcVir, tensor rmdf)
 365 {
 366     int      b0, b1, b, i, j, k, n;
 367     real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, wfac, lam;
 368     rvec     dx;
 369     int     *bla, *blnr, *blbnb;
 370     rvec    *r;
 371     real    *blc, *blmf, *blcc, *rhs1, *rhs2, *sol;
 372
 373     b0 = lincsd->th[th].b0;
 374     b1 = lincsd->th[th].b1;
 375
 376     bla    = lincsd->bla;
 377     r      = lincsd->tmpv;
 378     blnr   = lincsd->blnr;
 379     blbnb  = lincsd->blbnb;
 380     if (econq != econqForce)
 381     {
 382         /* Use mass-weighted parameters */
 383         blc  = lincsd->blc;
 384         blmf = lincsd->blmf;
 385     }
 386     else
 387     {
 388         /* Use non mass-weighted parameters */
 389         blc  = lincsd->blc1;
 390         blmf = lincsd->blmf1;
 391     }
 392     blcc   = lincsd->tmpncc;
 393     rhs1   = lincsd->tmp1;
 394     rhs2   = lincsd->tmp2;
 395     sol    = lincsd->tmp3;
 396
 397     /* Compute normalized i-j vectors */
 398     if (pbc)
 399     {
 400         for (b = b0; b < b1; b++)
 401         {
 402             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
 403             unitv(dx, r[b]);
 404         }
 405     }
 406     else
 407     {
 408         for (b = b0; b < b1; b++)
 409         {
 410             rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
 411             unitv(dx, r[b]);
 412         } /* 16 ncons flops */
 413     }
 414
 415 #pragma omp barrier
 416     for (b = b0; b < b1; b++)
 417     {
 418         tmp0 = r[b][0];
 419         tmp1 = r[b][1];
 420         tmp2 = r[b][2];
 421         i    = bla[2*b];
 422         j    = bla[2*b+1];
 423         for (n = blnr[b]; n < blnr[b+1]; n++)
 424         {
 425             k       = blbnb[n];
 426             blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 427         } /* 6 nr flops */
 428         mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
 429                       tmp1*(f[i][1] - f[j][1]) +
 430                       tmp2*(f[i][2] - f[j][2]));
 431         rhs1[b] = mvb;
 432         sol[b]  = mvb;
 433         /* 7 flops */
 434     }
 435     /* Together: 23*ncons + 6*nrtot flops */
 436
 437     lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 438     /* nrec*(ncons+2*nrtot) flops */
 439
 440     if (econq == econqDeriv_FlexCon)
 441     {
 442         /* We only want to constraint the flexible constraints,
 443          * so we mask out the normal ones by setting sol to 0.
 444          */
 445         for (b = b0; b < b1; b++)
 446         {
 447             if (!(lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
 448             {
 449                 sol[b] = 0;
 450             }
 451         }
 452     }
 453
 454     /* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
 455     for (b = b0; b < b1; b++)
 456     {
 457         sol[b] *= blc[b];
 458     }
 459
 460     /* When constraining forces, we should not use mass weighting,
 461      * so we pass invmass=NULL, which results in the use of 1 for all atoms.
 462      */
 463     lincs_update_atoms(lincsd, th, 1.0, sol, r,
 464                        (econq != econqForce) ? invmass : NULL, fp);
 465
 466     if (bCalcDHDL)
 467     {
 468         real dhdlambda;
 469
 470         dhdlambda = 0;
 471         for (b = b0; b < b1; b++)
 472         {
 473             dhdlambda -= sol[b]*lincsd->ddist[b];
 474         }
 475
 476         lincsd->th[th].dhdlambda = dhdlambda;
 477     }
 478
 479     if (bCalcVir)
 480     {
 481         /* Constraint virial,
 482          * determines sum r_bond x delta f,
 483          * where delta f is the constraint correction
 484          * of the quantity that is being constrained.
 485          */
 486         for (b = b0; b < b1; b++)
 487         {
 488             mvb = lincsd->bllen[b]*sol[b];
 489             for (i = 0; i < DIM; i++)
 490             {
 491                 tmp1 = mvb*r[b][i];
 492                 for (j = 0; j < DIM; j++)
 493                 {
 494                     rmdf[i][j] += tmp1*r[b][j];
 495                 }
 496             }
 497         } /* 23 ncons flops */
 498     }
 499 }
 500
 501 static void do_lincs(rvec *x, rvec *xp, matrix box, t_pbc *pbc,
 502                      struct gmx_lincsdata *lincsd, int th,
 503                      real *invmass,
 504                      t_commrec *cr,
 505                      gmx_bool bCalcDHDL,
 506                      real wangle, int *warn,
 507                      real invdt, rvec *v,
 508                      gmx_bool bCalcVir, tensor vir_r_m_dr)
 509 {
 510     int      b0, b1, b, i, j, k, n, iter;
 511     real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, len2, dlen2, wfac;
 512     rvec     dx;
 513     int     *bla, *blnr, *blbnb;
 514     rvec    *r;
 515     real    *blc, *blmf, *bllen, *blcc, *rhs1, *rhs2, *sol, *blc_sol, *mlambda;
 516     int     *nlocat;
 517
 518     b0 = lincsd->th[th].b0;
 519     b1 = lincsd->th[th].b1;
 520
 521     bla     = lincsd->bla;
 522     r       = lincsd->tmpv;
 523     blnr    = lincsd->blnr;
 524     blbnb   = lincsd->blbnb;
 525     blc     = lincsd->blc;
 526     blmf    = lincsd->blmf;
 527     bllen   = lincsd->bllen;
 528     blcc    = lincsd->tmpncc;
 529     rhs1    = lincsd->tmp1;
 530     rhs2    = lincsd->tmp2;
 531     sol     = lincsd->tmp3;
 532     blc_sol = lincsd->tmp4;
 533     mlambda = lincsd->mlambda;
 534
 535     if (DOMAINDECOMP(cr) && cr->dd->constraints)
 536     {
 537         nlocat = dd_constraints_nlocalatoms(cr->dd);
 538     }
 539     else
 540     {
 541         nlocat = NULL;
 542     }
 543
 544     if (pbc)
 545     {
 546         /* Compute normalized i-j vectors */
 547         for (b = b0; b < b1; b++)
 548         {
 549             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
 550             unitv(dx, r[b]);
 551         }
 552 #pragma omp barrier
 553         for (b = b0; b < b1; b++)
 554         {
 555             for (n = blnr[b]; n < blnr[b+1]; n++)
 556             {
 557                 blcc[n] = blmf[n]*iprod(r[b], r[blbnb[n]]);
 558             }
 559             pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
 560             mvb     = blc[b]*(iprod(r[b], dx) - bllen[b]);
 561             rhs1[b] = mvb;
 562             sol[b]  = mvb;
 563         }
 564     }
 565     else
 566     {
 567         /* Compute normalized i-j vectors */
 568         for (b = b0; b < b1; b++)
 569         {
 570             i       = bla[2*b];
 571             j       = bla[2*b+1];
 572             tmp0    = x[i][0] - x[j][0];
 573             tmp1    = x[i][1] - x[j][1];
 574             tmp2    = x[i][2] - x[j][2];
 575             rlen    = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
 576             r[b][0] = rlen*tmp0;
 577             r[b][1] = rlen*tmp1;
 578             r[b][2] = rlen*tmp2;
 579         } /* 16 ncons flops */
 580
 581 #pragma omp barrier
 582         for (b = b0; b < b1; b++)
 583         {
 584             tmp0 = r[b][0];
 585             tmp1 = r[b][1];
 586             tmp2 = r[b][2];
 587             len  = bllen[b];
 588             i    = bla[2*b];
 589             j    = bla[2*b+1];
 590             for (n = blnr[b]; n < blnr[b+1]; n++)
 591             {
 592                 k       = blbnb[n];
 593                 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 594             } /* 6 nr flops */
 595             mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
 596                           tmp1*(xp[i][1] - xp[j][1]) +
 597                           tmp2*(xp[i][2] - xp[j][2]) - len);
 598             rhs1[b] = mvb;
 599             sol[b]  = mvb;
 600             /* 10 flops */
 601         }
 602         /* Together: 26*ncons + 6*nrtot flops */
 603     }
 604
 605     lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 606     /* nrec*(ncons+2*nrtot) flops */
 607
 608     for (b = b0; b < b1; b++)
 609     {
 610         mlambda[b] = blc[b]*sol[b];
 611     }
 612
 613     /* Update the coordinates */
 614     lincs_update_atoms(lincsd, th, 1.0, mlambda, r, invmass, xp);
 615
 616     /*
 617      ********  Correction for centripetal effects  ********
 618      */
 619
 620     wfac = cos(DEG2RAD*wangle);
 621     wfac = wfac*wfac;
 622
 623     for (iter = 0; iter < lincsd->nIter; iter++)
 624     {
 625         if ((lincsd->bCommIter && DOMAINDECOMP(cr) && cr->dd->constraints))
 626         {
 627 #pragma omp barrier
 628 #pragma omp master
 629             {
 630                 /* Communicate the corrected non-local coordinates */
 631                 if (DOMAINDECOMP(cr))
 632                 {
 633                     dd_move_x_constraints(cr->dd, box, xp, NULL, FALSE);
 634                 }
 635             }
 636         }
 637
 638 #pragma omp barrier
 639         for (b = b0; b < b1; b++)
 640         {
 641             len = bllen[b];
 642             if (pbc)
 643             {
 644                 pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
 645             }
 646             else
 647             {
 648                 rvec_sub(xp[bla[2*b]], xp[bla[2*b+1]], dx);
 649             }
 650             len2  = len*len;
 651             dlen2 = 2*len2 - norm2(dx);
 652             if (dlen2 < wfac*len2 && (nlocat == NULL || nlocat[b]))
 653             {
 654                 *warn = b;
 655             }
 656             if (dlen2 > 0)
 657             {
 658                 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
 659             }
 660             else
 661             {
 662                 mvb = blc[b]*len;
 663             }
 664             rhs1[b] = mvb;
 665             sol[b]  = mvb;
 666         } /* 20*ncons flops */
 667
 668         lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 669         /* nrec*(ncons+2*nrtot) flops */
 670
 671         for (b = b0; b < b1; b++)
 672         {
 673             mvb         = blc[b]*sol[b];
 674             blc_sol[b]  = mvb;
 675             mlambda[b] += mvb;
 676         }
 677
 678         /* Update the coordinates */
 679         lincs_update_atoms(lincsd, th, 1.0, blc_sol, r, invmass, xp);
 680     }
 681     /* nit*ncons*(37+9*nrec) flops */
 682
 683     if (v != NULL)
 684     {
 685         /* Update the velocities */
 686         lincs_update_atoms(lincsd, th, invdt, mlambda, r, invmass, v);
 687         /* 16 ncons flops */
 688     }
 689
 690     if (nlocat != NULL && (bCalcDHDL || bCalcVir))
 691     {
 692         /* In lincs_update_atoms threads might cross-read mlambda */
 693 #pragma omp barrier
 694
 695         /* Only account for local atoms */
 696         for (b = b0; b < b1; b++)
 697         {
 698             mlambda[b] *= 0.5*nlocat[b];
 699         }
 700     }
 701
 702     if (bCalcDHDL)
 703     {
 704         real dhdl;
 705
 706         dhdl = 0;
 707         for (b = b0; b < b1; b++)
 708         {
 709             /* Note that this this is dhdl*dt^2, the dt^2 factor is corrected
 710              * later after the contributions are reduced over the threads.
 711              */
 712             dhdl -= lincsd->mlambda[b]*lincsd->ddist[b];
 713         }
 714         lincsd->th[th].dhdlambda = dhdl;
 715     }
 716
 717     if (bCalcVir)
 718     {
 719         /* Constraint virial */
 720         for (b = b0; b < b1; b++)
 721         {
 722             tmp0 = -bllen[b]*mlambda[b];
 723             for (i = 0; i < DIM; i++)
 724             {
 725                 tmp1 = tmp0*r[b][i];
 726                 for (j = 0; j < DIM; j++)
 727                 {
 728                     vir_r_m_dr[i][j] -= tmp1*r[b][j];
 729                 }
 730             }
 731         } /* 22 ncons flops */
 732     }
 733
 734     /* Total:
 735      * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
 736      * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
 737      *
 738      * (26+nrec)*ncons + (6+2*nrec)*nrtot
 739      * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
 740      * if nit=1
 741      * (63+nrec)*ncons + (6+4*nrec)*nrtot
 742      */
 743 }
 744
 745 void set_lincs_matrix(struct gmx_lincsdata *li, real *invmass, real lambda)
 746 {
 747     int        i, a1, a2, n, k, sign, center;
 748     int        end, nk, kk;
 749     const real invsqrt2 = 0.7071067811865475244;
 750
 751     for (i = 0; (i < li->nc); i++)
 752     {
 753         a1          = li->bla[2*i];
 754         a2          = li->bla[2*i+1];
 755         li->blc[i]  = gmx_invsqrt(invmass[a1] + invmass[a2]);
 756         li->blc1[i] = invsqrt2;
 757     }
 758
 759     /* Construct the coupling coefficient matrix blmf */
 760     li->ntriangle    = 0;
 761     li->ncc_triangle = 0;
 762     for (i = 0; (i < li->nc); i++)
 763     {
 764         a1 = li->bla[2*i];
 765         a2 = li->bla[2*i+1];
 766         for (n = li->blnr[i]; (n < li->blnr[i+1]); n++)
 767         {
 768             k = li->blbnb[n];
 769             if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
 770             {
 771                 sign = -1;
 772             }
 773             else
 774             {
 775                 sign = 1;
 776             }
 777             if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
 778             {
 779                 center = a1;
 780                 end    = a2;
 781             }
 782             else
 783             {
 784                 center = a2;
 785                 end    = a1;
 786             }
 787             li->blmf[n]  = sign*invmass[center]*li->blc[i]*li->blc[k];
 788             li->blmf1[n] = sign*0.5;
 789             if (li->ncg_triangle > 0)
 790             {
 791                 /* Look for constraint triangles */
 792                 for (nk = li->blnr[k]; (nk < li->blnr[k+1]); nk++)
 793                 {
 794                     kk = li->blbnb[nk];
 795                     if (kk != i && kk != k &&
 796                         (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
 797                     {
 798                         if (li->ntriangle == 0 ||
 799                             li->triangle[li->ntriangle-1] < i)
 800                         {
 801                             /* Add this constraint to the triangle list */
 802                             li->triangle[li->ntriangle] = i;
 803                             li->tri_bits[li->ntriangle] = 0;
 804                             li->ntriangle++;
 805                             if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
 806                             {
 807                                 gmx_fatal(FARGS, "A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
 808                                           li->blnr[i+1] - li->blnr[i],
 809                                           sizeof(li->tri_bits[0])*8-1);
 810                             }
 811                         }
 812                         li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
 813                         li->ncc_triangle++;
 814                     }
 815                 }
 816             }
 817         }
 818     }
 819
 820     if (debug)
 821     {
 822         fprintf(debug, "Of the %d constraints %d participate in triangles\n",
 823                 li->nc, li->ntriangle);
 824         fprintf(debug, "There are %d couplings of which %d in triangles\n",
 825                 li->ncc, li->ncc_triangle);
 826     }
 827
 828     /* Set matlam,
 829      * so we know with which lambda value the masses have been set.
 830      */
 831     li->matlam = lambda;
 832 }
 833
 834 static int count_triangle_constraints(t_ilist *ilist, t_blocka *at2con)
 835 {
 836     int      ncon1, ncon_tot;
 837     int      c0, a00, a01, n1, c1, a10, a11, ac1, n2, c2, a20, a21;
 838     int      ncon_triangle;
 839     gmx_bool bTriangle;
 840     t_iatom *ia1, *ia2, *iap;
 841
 842     ncon1    = ilist[F_CONSTR].nr/3;
 843     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 844
 845     ia1 = ilist[F_CONSTR].iatoms;
 846     ia2 = ilist[F_CONSTRNC].iatoms;
 847
 848     ncon_triangle = 0;
 849     for (c0 = 0; c0 < ncon_tot; c0++)
 850     {
 851         bTriangle = FALSE;
 852         iap       = constr_iatomptr(ncon1, ia1, ia2, c0);
 853         a00       = iap[1];
 854         a01       = iap[2];
 855         for (n1 = at2con->index[a01]; n1 < at2con->index[a01+1]; n1++)
 856         {
 857             c1 = at2con->a[n1];
 858             if (c1 != c0)
 859             {
 860                 iap = constr_iatomptr(ncon1, ia1, ia2, c1);
 861                 a10 = iap[1];
 862                 a11 = iap[2];
 863                 if (a10 == a01)
 864                 {
 865                     ac1 = a11;
 866                 }
 867                 else
 868                 {
 869                     ac1 = a10;
 870                 }
 871                 for (n2 = at2con->index[ac1]; n2 < at2con->index[ac1+1]; n2++)
 872                 {
 873                     c2 = at2con->a[n2];
 874                     if (c2 != c0 && c2 != c1)
 875                     {
 876                         iap = constr_iatomptr(ncon1, ia1, ia2, c2);
 877                         a20 = iap[1];
 878                         a21 = iap[2];
 879                         if (a20 == a00 || a21 == a00)
 880                         {
 881                             bTriangle = TRUE;
 882                         }
 883                     }
 884                 }
 885             }
 886         }
 887         if (bTriangle)
 888         {
 889             ncon_triangle++;
 890         }
 891     }
 892
 893     return ncon_triangle;
 894 }
 895
 896 static gmx_bool more_than_two_sequential_constraints(const t_ilist  *ilist,
 897                                                      const t_blocka *at2con)
 898 {
 899     t_iatom  *ia1, *ia2, *iap;
 900     int       ncon1, ncon_tot, c;
 901     int       a1, a2;
 902     gmx_bool  bMoreThanTwoSequentialConstraints;
 903
 904     ncon1    = ilist[F_CONSTR].nr/3;
 905     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 906
 907     ia1 = ilist[F_CONSTR].iatoms;
 908     ia2 = ilist[F_CONSTRNC].iatoms;
 909
 910     bMoreThanTwoSequentialConstraints = FALSE;
 911     for (c = 0; c < ncon_tot && !bMoreThanTwoSequentialConstraints; c++)
 912     {
 913         iap = constr_iatomptr(ncon1, ia1, ia2, c);
 914         a1  = iap[1];
 915         a2  = iap[2];
 916         /* Check if this constraint has constraints connected at both atoms */
 917         if (at2con->index[a1+1] - at2con->index[a1] > 1 &&
 918             at2con->index[a2+1] - at2con->index[a2] > 1)
 919         {
 920             bMoreThanTwoSequentialConstraints = TRUE;
 921         }
 922     }
 923
 924     return bMoreThanTwoSequentialConstraints;
 925 }
 926
 927 static int int_comp(const void *a, const void *b)
 928 {
 929     return (*(int *)a) - (*(int *)b);
 930 }
 931
 932 gmx_lincsdata_t init_lincs(FILE *fplog, gmx_mtop_t *mtop,
 933                            int nflexcon_global, t_blocka *at2con,
 934                            gmx_bool bPLINCS, int nIter, int nProjOrder)
 935 {
 936     struct gmx_lincsdata *li;
 937     int                   mb;
 938     gmx_moltype_t        *molt;
 939
 940     if (fplog)
 941     {
 942         fprintf(fplog, "\nInitializing%s LINear Constraint Solver\n",
 943                 bPLINCS ? " Parallel" : "");
 944     }
 945
 946     snew(li, 1);
 947
 948     li->ncg      =
 949         gmx_mtop_ftype_count(mtop, F_CONSTR) +
 950         gmx_mtop_ftype_count(mtop, F_CONSTRNC);
 951     li->ncg_flex = nflexcon_global;
 952
 953     li->nIter  = nIter;
 954     li->nOrder = nProjOrder;
 955
 956     li->ncg_triangle = 0;
 957     li->bCommIter    = FALSE;
 958     for (mb = 0; mb < mtop->nmolblock; mb++)
 959     {
 960         molt              = &mtop->moltype[mtop->molblock[mb].type];
 961         li->ncg_triangle +=
 962             mtop->molblock[mb].nmol*
 963             count_triangle_constraints(molt->ilist,
 964                                        &at2con[mtop->molblock[mb].type]);
 965         if (bPLINCS && li->bCommIter == FALSE)
 966         {
 967             /* Check if we need to communicate not only before LINCS,
 968              * but also before each iteration.
 969              * The check for only two sequential constraints is only
 970              * useful for the common case of H-bond only constraints.
 971              * With more effort we could also make it useful for small
 972              * molecules with nr. sequential constraints <= nOrder-1.
 973              */
 974             li->bCommIter = (li->nOrder < 1 || more_than_two_sequential_constraints(molt->ilist, &at2con[mtop->molblock[mb].type]));
 975         }
 976     }
 977     if (debug && bPLINCS)
 978     {
 979         fprintf(debug, "PLINCS communication before each iteration: %d\n",
 980                 li->bCommIter);
 981     }
 982
 983     /* LINCS can run on any number of threads.
 984      * Currently the number is fixed for the whole simulation,
 985      * but it could be set in set_lincs().
 986      */
 987     li->nth = gmx_omp_nthreads_get(emntLINCS);
 988     if (li->nth == 1)
 989     {
 990         snew(li->th, 1);
 991     }
 992     else
 993     {
 994         /* Allocate an extra elements for "thread-overlap" constraints */
 995         snew(li->th, li->nth+1);
 996     }
 997     if (debug)
 998     {
 999         fprintf(debug, "LINCS: using %d threads\n", li->nth);
1000     }
1001
1002     if (bPLINCS || li->ncg_triangle > 0)
1003     {
1004         please_cite(fplog, "Hess2008a");
1005     }
1006     else
1007     {
1008         please_cite(fplog, "Hess97a");
1009     }
1010
1011     if (fplog)
1012     {
1013         fprintf(fplog, "The number of constraints is %d\n", li->ncg);
1014         if (bPLINCS)
1015         {
1016             fprintf(fplog, "There are inter charge-group constraints,\n"
1017                     "will communicate selected coordinates each lincs iteration\n");
1018         }
1019         if (li->ncg_triangle > 0)
1020         {
1021             fprintf(fplog,
1022                     "%d constraints are involved in constraint triangles,\n"
1023                     "will apply an additional matrix expansion of order %d for couplings\n"
1024                     "between constraints inside triangles\n",
1025                     li->ncg_triangle, li->nOrder);
1026         }
1027     }
1028
1029     return li;
1030 }
1031
1032 /* Sets up the work division over the threads */
1033 static void lincs_thread_setup(struct gmx_lincsdata *li, int natoms)
1034 {
1035     lincs_thread_t *li_m;
1036     int             th;
1037     unsigned       *atf;
1038     int             a;
1039
1040     if (natoms > li->atf_nalloc)
1041     {
1042         li->atf_nalloc = over_alloc_large(natoms);
1043         srenew(li->atf, li->atf_nalloc);
1044     }
1045
1046     atf = li->atf;
1047     /* Clear the atom flags */
1048     for (a = 0; a < natoms; a++)
1049     {
1050         atf[a] = 0;
1051     }
1052
1053     for (th = 0; th < li->nth; th++)
1054     {
1055         lincs_thread_t *li_th;
1056         int             b;
1057
1058         li_th = &li->th[th];
1059
1060         /* The constraints are divided equally over the threads */
1061         li_th->b0 = (li->nc* th   )/li->nth;
1062         li_th->b1 = (li->nc*(th+1))/li->nth;
1063
1064         if (th < sizeof(*atf)*8)
1065         {
1066             /* For each atom set a flag for constraints from each */
1067             for (b = li_th->b0; b < li_th->b1; b++)
1068             {
1069                 atf[li->bla[b*2]  ] |= (1U<<th);
1070                 atf[li->bla[b*2+1]] |= (1U<<th);
1071             }
1072         }
1073     }
1074
1075 #pragma omp parallel for num_threads(li->nth) schedule(static)
1076     for (th = 0; th < li->nth; th++)
1077     {
1078         lincs_thread_t *li_th;
1079         unsigned        mask;
1080         int             b;
1081
1082         li_th = &li->th[th];
1083
1084         if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
1085         {
1086             li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
1087             srenew(li_th->ind, li_th->ind_nalloc);
1088             srenew(li_th->ind_r, li_th->ind_nalloc);
1089         }
1090
1091         if (th < sizeof(*atf)*8)
1092         {
1093             mask = (1U<<th) - 1U;
1094
1095             li_th->nind   = 0;
1096             li_th->nind_r = 0;
1097             for (b = li_th->b0; b < li_th->b1; b++)
1098             {
1099                 /* We let the constraint with the lowest thread index
1100                  * operate on atoms with constraints from multiple threads.
1101                  */
1102                 if (((atf[li->bla[b*2]]   & mask) == 0) &&
1103                     ((atf[li->bla[b*2+1]] & mask) == 0))
1104                 {
1105                     /* Add the constraint to the local atom update index */
1106                     li_th->ind[li_th->nind++] = b;
1107                 }
1108                 else
1109                 {
1110                     /* Add the constraint to the rest block */
1111                     li_th->ind_r[li_th->nind_r++] = b;
1112                 }
1113             }
1114         }
1115         else
1116         {
1117             /* We are out of bits, assign all constraints to rest */
1118             for (b = li_th->b0; b < li_th->b1; b++)
1119             {
1120                 li_th->ind_r[li_th->nind_r++] = b;
1121             }
1122         }
1123     }
1124
1125     /* We need to copy all constraints which have not be assigned
1126      * to a thread to a separate list which will be handled by one thread.
1127      */
1128     li_m = &li->th[li->nth];
1129
1130     li_m->nind = 0;
1131     for (th = 0; th < li->nth; th++)
1132     {
1133         lincs_thread_t *li_th;
1134         int             b;
1135
1136         li_th   = &li->th[th];
1137
1138         if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1139         {
1140             li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1141             srenew(li_m->ind, li_m->ind_nalloc);
1142         }
1143
1144         for (b = 0; b < li_th->nind_r; b++)
1145         {
1146             li_m->ind[li_m->nind++] = li_th->ind_r[b];
1147         }
1148
1149         if (debug)
1150         {
1151             fprintf(debug, "LINCS thread %d: %d constraints\n",
1152                     th, li_th->nind);
1153         }
1154     }
1155
1156     if (debug)
1157     {
1158         fprintf(debug, "LINCS thread r: %d constraints\n",
1159                 li_m->nind);
1160     }
1161 }
1162
1163
1164 void set_lincs(t_idef *idef, t_mdatoms *md,
1165                gmx_bool bDynamics, t_commrec *cr,
1166                struct gmx_lincsdata *li)
1167 {
1168     int          start, natoms, nflexcon;
1169     t_blocka     at2con;
1170     t_iatom     *iatom;
1171     int          i, k, ncc_alloc, ni, con, nconnect, concon;
1172     int          type, a1, a2;
1173     real         lenA = 0, lenB;
1174     gmx_bool     bLocal;
1175
1176     li->nc  = 0;
1177     li->ncc = 0;
1178     /* Zero the thread index ranges.
1179      * Otherwise without local constraints we could return with old ranges.
1180      */
1181     for (i = 0; i < li->nth; i++)
1182     {
1183         li->th[i].b0   = 0;
1184         li->th[i].b1   = 0;
1185         li->th[i].nind = 0;
1186     }
1187     if (li->nth > 1)
1188     {
1189         li->th[li->nth].nind = 0;
1190     }
1191
1192     /* This is the local topology, so there are only F_CONSTR constraints */
1193     if (idef->il[F_CONSTR].nr == 0)
1194     {
1195         /* There are no constraints,
1196          * we do not need to fill any data structures.
1197          */
1198         return;
1199     }
1200
1201     if (debug)
1202     {
1203         fprintf(debug, "Building the LINCS connectivity\n");
1204     }
1205
1206     if (DOMAINDECOMP(cr))
1207     {
1208         if (cr->dd->constraints)
1209         {
1210             dd_get_constraint_range(cr->dd, &start, &natoms);
1211         }
1212         else
1213         {
1214             natoms = cr->dd->nat_home;
1215         }
1216         start = 0;
1217     }
1218     else
1219     {
1220         start  = 0;
1221         natoms = md->homenr;
1222     }
1223     at2con = make_at2con(start, natoms, idef->il, idef->iparams, bDynamics,
1224                          &nflexcon);
1225
1226
1227     if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1228     {
1229         li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1230         srenew(li->bllen0, li->nc_alloc);
1231         srenew(li->ddist, li->nc_alloc);
1232         srenew(li->bla, 2*li->nc_alloc);
1233         srenew(li->blc, li->nc_alloc);
1234         srenew(li->blc1, li->nc_alloc);
1235         srenew(li->blnr, li->nc_alloc+1);
1236         srenew(li->bllen, li->nc_alloc);
1237         srenew(li->tmpv, li->nc_alloc);
1238         srenew(li->tmp1, li->nc_alloc);
1239         srenew(li->tmp2, li->nc_alloc);
1240         srenew(li->tmp3, li->nc_alloc);
1241         srenew(li->tmp4, li->nc_alloc);
1242         srenew(li->mlambda, li->nc_alloc);
1243         if (li->ncg_triangle > 0)
1244         {
1245             /* This is allocating too much, but it is difficult to improve */
1246             srenew(li->triangle, li->nc_alloc);
1247             srenew(li->tri_bits, li->nc_alloc);
1248         }
1249     }
1250
1251     iatom = idef->il[F_CONSTR].iatoms;
1252
1253     ncc_alloc   = li->ncc_alloc;
1254     li->blnr[0] = 0;
1255
1256     ni = idef->il[F_CONSTR].nr/3;
1257
1258     con           = 0;
1259     nconnect      = 0;
1260     li->blnr[con] = nconnect;
1261     for (i = 0; i < ni; i++)
1262     {
1263         bLocal = TRUE;
1264         type   = iatom[3*i];
1265         a1     = iatom[3*i+1];
1266         a2     = iatom[3*i+2];
1267         lenA   = idef->iparams[type].constr.dA;
1268         lenB   = idef->iparams[type].constr.dB;
1269         /* Skip the flexible constraints when not doing dynamics */
1270         if (bDynamics || lenA != 0 || lenB != 0)
1271         {
1272             li->bllen0[con]  = lenA;
1273             li->ddist[con]   = lenB - lenA;
1274             /* Set the length to the topology A length */
1275             li->bllen[con]   = li->bllen0[con];
1276             li->bla[2*con]   = a1;
1277             li->bla[2*con+1] = a2;
1278             /* Construct the constraint connection matrix blbnb */
1279             for (k = at2con.index[a1-start]; k < at2con.index[a1-start+1]; k++)
1280             {
1281                 concon = at2con.a[k];
1282                 if (concon != i)
1283                 {
1284                     if (nconnect >= ncc_alloc)
1285                     {
1286                         ncc_alloc = over_alloc_small(nconnect+1);
1287                         srenew(li->blbnb, ncc_alloc);
1288                     }
1289                     li->blbnb[nconnect++] = concon;
1290                 }
1291             }
1292             for (k = at2con.index[a2-start]; k < at2con.index[a2-start+1]; k++)
1293             {
1294                 concon = at2con.a[k];
1295                 if (concon != i)
1296                 {
1297                     if (nconnect+1 > ncc_alloc)
1298                     {
1299                         ncc_alloc = over_alloc_small(nconnect+1);
1300                         srenew(li->blbnb, ncc_alloc);
1301                     }
1302                     li->blbnb[nconnect++] = concon;
1303                 }
1304             }
1305             li->blnr[con+1] = nconnect;
1306
1307             if (cr->dd == NULL)
1308             {
1309                 /* Order the blbnb matrix to optimize memory access */
1310                 qsort(&(li->blbnb[li->blnr[con]]), li->blnr[con+1]-li->blnr[con],
1311                       sizeof(li->blbnb[0]), int_comp);
1312             }
1313             /* Increase the constraint count */
1314             con++;
1315         }
1316     }
1317
1318     done_blocka(&at2con);
1319
1320     /* This is the real number of constraints,
1321      * without dynamics the flexible constraints are not present.
1322      */
1323     li->nc = con;
1324
1325     li->ncc = li->blnr[con];
1326     if (cr->dd == NULL)
1327     {
1328         /* Since the matrix is static, we can free some memory */
1329         ncc_alloc = li->ncc;
1330         srenew(li->blbnb, ncc_alloc);
1331     }
1332
1333     if (ncc_alloc > li->ncc_alloc)
1334     {
1335         li->ncc_alloc = ncc_alloc;
1336         srenew(li->blmf, li->ncc_alloc);
1337         srenew(li->blmf1, li->ncc_alloc);
1338         srenew(li->tmpncc, li->ncc_alloc);
1339     }
1340
1341     if (debug)
1342     {
1343         fprintf(debug, "Number of constraints is %d, couplings %d\n",
1344                 li->nc, li->ncc);
1345     }
1346
1347     if (li->nth == 1)
1348     {
1349         li->th[0].b0 = 0;
1350         li->th[0].b1 = li->nc;
1351     }
1352     else
1353     {
1354         lincs_thread_setup(li, md->nr);
1355     }
1356
1357     set_lincs_matrix(li, md->invmass, md->lambda);
1358 }
1359
1360 static void lincs_warning(FILE *fplog,
1361                           gmx_domdec_t *dd, rvec *x, rvec *xprime, t_pbc *pbc,
1362                           int ncons, int *bla, real *bllen, real wangle,
1363                           int maxwarn, int *warncount)
1364 {
1365     int  b, i, j;
1366     rvec v0, v1;
1367     real wfac, d0, d1, cosine;
1368     char buf[STRLEN];
1369
1370     wfac = cos(DEG2RAD*wangle);
1371
1372     sprintf(buf, "bonds that rotated more than %g degrees:\n"
1373             " atom 1 atom 2  angle  previous, current, constraint length\n",
1374             wangle);
1375     fprintf(stderr, "%s", buf);
1376     if (fplog)
1377     {
1378         fprintf(fplog, "%s", buf);
1379     }
1380
1381     for (b = 0; b < ncons; b++)
1382     {
1383         i = bla[2*b];
1384         j = bla[2*b+1];
1385         if (pbc)
1386         {
1387             pbc_dx_aiuc(pbc, x[i], x[j], v0);
1388             pbc_dx_aiuc(pbc, xprime[i], xprime[j], v1);
1389         }
1390         else
1391         {
1392             rvec_sub(x[i], x[j], v0);
1393             rvec_sub(xprime[i], xprime[j], v1);
1394         }
1395         d0     = norm(v0);
1396         d1     = norm(v1);
1397         cosine = iprod(v0, v1)/(d0*d1);
1398         if (cosine < wfac)
1399         {
1400             sprintf(buf, " %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n",
1401                     ddglatnr(dd, i), ddglatnr(dd, j),
1402                     RAD2DEG*acos(cosine), d0, d1, bllen[b]);
1403             fprintf(stderr, "%s", buf);
1404             if (fplog)
1405             {
1406                 fprintf(fplog, "%s", buf);
1407             }
1408             if (!gmx_isfinite(d1))
1409             {
1410                 gmx_fatal(FARGS, "Bond length not finite.");
1411             }
1412
1413             (*warncount)++;
1414         }
1415     }
1416     if (*warncount > maxwarn)
1417     {
1418         too_many_constraint_warnings(econtLINCS, *warncount);
1419     }
1420 }
1421
1422 static void cconerr(gmx_domdec_t *dd,
1423                     int ncons, int *bla, real *bllen, rvec *x, t_pbc *pbc,
1424                     real *ncons_loc, real *ssd, real *max, int *imax)
1425 {
1426     real       len, d, ma, ssd2, r2;
1427     int       *nlocat, count, b, im;
1428     rvec       dx;
1429
1430     if (dd && dd->constraints)
1431     {
1432         nlocat = dd_constraints_nlocalatoms(dd);
1433     }
1434     else
1435     {
1436         nlocat = 0;
1437     }
1438
1439     ma    = 0;
1440     ssd2  = 0;
1441     im    = 0;
1442     count = 0;
1443     for (b = 0; b < ncons; b++)
1444     {
1445         if (pbc)
1446         {
1447             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
1448         }
1449         else
1450         {
1451             rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
1452         }
1453         r2  = norm2(dx);
1454         len = r2*gmx_invsqrt(r2);
1455         d   = fabs(len/bllen[b]-1);
1456         if (d > ma && (nlocat == NULL || nlocat[b]))
1457         {
1458             ma = d;
1459             im = b;
1460         }
1461         if (nlocat == NULL)
1462         {
1463             ssd2 += d*d;
1464             count++;
1465         }
1466         else
1467         {
1468             ssd2  += nlocat[b]*d*d;
1469             count += nlocat[b];
1470         }
1471     }
1472
1473     *ncons_loc = (nlocat ? 0.5 : 1)*count;
1474     *ssd       = (nlocat ? 0.5 : 1)*ssd2;
1475     *max       = ma;
1476     *imax      = im;
1477 }
1478
1479 gmx_bool constrain_lincs(FILE *fplog, gmx_bool bLog, gmx_bool bEner,
1480                          t_inputrec *ir,
1481                          gmx_int64_t step,
1482                          struct gmx_lincsdata *lincsd, t_mdatoms *md,
1483                          t_commrec *cr,
1484                          rvec *x, rvec *xprime, rvec *min_proj,
1485                          matrix box, t_pbc *pbc,
1486                          real lambda, real *dvdlambda,
1487                          real invdt, rvec *v,
1488                          gmx_bool bCalcVir, tensor vir_r_m_dr,
1489                          int econq,
1490                          t_nrnb *nrnb,
1491                          int maxwarn, int *warncount)
1492 {
1493     gmx_bool  bCalcDHDL;
1494     char      buf[STRLEN], buf2[22], buf3[STRLEN];
1495     int       i, warn, p_imax, error;
1496     real      ncons_loc, p_ssd, p_max = 0;
1497     rvec      dx;
1498     gmx_bool  bOK;
1499
1500     bOK = TRUE;
1501
1502     /* This boolean should be set by a flag passed to this routine.
1503      * We can also easily check if any constraint length is changed,
1504      * if not dH/dlambda=0 and we can also set the boolean to FALSE.
1505      */
1506     bCalcDHDL = (ir->efep != efepNO && dvdlambda != NULL);
1507
1508     if (lincsd->nc == 0 && cr->dd == NULL)
1509     {
1510         if (bLog || bEner)
1511         {
1512             lincsd->rmsd_data[0] = 0;
1513             if (ir->eI == eiSD2 && v == NULL)
1514             {
1515                 i = 2;
1516             }
1517             else
1518             {
1519                 i = 1;
1520             }
1521             lincsd->rmsd_data[i] = 0;
1522         }
1523
1524         return bOK;
1525     }
1526
1527     if (econq == econqCoord)
1528     {
1529         /* We can't use bCalcDHDL here, since NULL can be passed for dvdlambda
1530          * also with efep!=fepNO.
1531          */
1532         if (ir->efep != efepNO)
1533         {
1534             if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1535             {
1536                 set_lincs_matrix(lincsd, md->invmass, md->lambda);
1537             }
1538
1539             for (i = 0; i < lincsd->nc; i++)
1540             {
1541                 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1542             }
1543         }
1544
1545         if (lincsd->ncg_flex)
1546         {
1547             /* Set the flexible constraint lengths to the old lengths */
1548             if (pbc != NULL)
1549             {
1550                 for (i = 0; i < lincsd->nc; i++)
1551                 {
1552                     if (lincsd->bllen[i] == 0)
1553                     {
1554                         pbc_dx_aiuc(pbc, x[lincsd->bla[2*i]], x[lincsd->bla[2*i+1]], dx);
1555                         lincsd->bllen[i] = norm(dx);
1556                     }
1557                 }
1558             }
1559             else
1560             {
1561                 for (i = 0; i < lincsd->nc; i++)
1562                 {
1563                     if (lincsd->bllen[i] == 0)
1564                     {
1565                         lincsd->bllen[i] =
1566                             sqrt(distance2(x[lincsd->bla[2*i]],
1567                                            x[lincsd->bla[2*i+1]]));
1568                     }
1569                 }
1570             }
1571         }
1572
1573         if (bLog && fplog)
1574         {
1575             cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1576                     &ncons_loc, &p_ssd, &p_max, &p_imax);
1577         }
1578
1579         /* This warn var can be updated by multiple threads
1580          * at the same time. But as we only need to detect
1581          * if a warning occured or not, this is not an issue.
1582          */
1583         warn = -1;
1584
1585         /* The OpenMP parallel region of constrain_lincs for coords */
1586 #pragma omp parallel num_threads(lincsd->nth)
1587         {
1588             int th = gmx_omp_get_thread_num();
1589
1590             clear_mat(lincsd->th[th].vir_r_m_dr);
1591
1592             do_lincs(x, xprime, box, pbc, lincsd, th,
1593                      md->invmass, cr,
1594                      bCalcDHDL,
1595                      ir->LincsWarnAngle, &warn,
1596                      invdt, v, bCalcVir,
1597                      th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1598         }
1599
1600         if (bLog && fplog && lincsd->nc > 0)
1601         {
1602             fprintf(fplog, "   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
1603             fprintf(fplog, "       Before LINCS          %.6f    %.6f %6d %6d\n",
1604                     sqrt(p_ssd/ncons_loc), p_max,
1605                     ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1606                     ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1607         }
1608         if (bLog || bEner)
1609         {
1610             cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1611                     &ncons_loc, &p_ssd, &p_max, &p_imax);
1612             /* Check if we are doing the second part of SD */
1613             if (ir->eI == eiSD2 && v == NULL)
1614             {
1615                 i = 2;
1616             }
1617             else
1618             {
1619                 i = 1;
1620             }
1621             lincsd->rmsd_data[0] = ncons_loc;
1622             lincsd->rmsd_data[i] = p_ssd;
1623         }
1624         else
1625         {
1626             lincsd->rmsd_data[0] = 0;
1627             lincsd->rmsd_data[1] = 0;
1628             lincsd->rmsd_data[2] = 0;
1629         }
1630         if (bLog && fplog && lincsd->nc > 0)
1631         {
1632             fprintf(fplog,
1633                     "        After LINCS          %.6f    %.6f %6d %6d\n\n",
1634                     sqrt(p_ssd/ncons_loc), p_max,
1635                     ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1636                     ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1637         }
1638
1639         if (warn >= 0)
1640         {
1641             if (maxwarn >= 0)
1642             {
1643                 cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1644                         &ncons_loc, &p_ssd, &p_max, &p_imax);
1645                 if (MULTISIM(cr))
1646                 {
1647                     sprintf(buf3, " in simulation %d", cr->ms->sim);
1648                 }
1649                 else
1650                 {
1651                     buf3[0] = 0;
1652                 }
1653                 sprintf(buf, "\nStep %s, time %g (ps)  LINCS WARNING%s\n"
1654                         "relative constraint deviation after LINCS:\n"
1655                         "rms %.6f, max %.6f (between atoms %d and %d)\n",
1656                         gmx_step_str(step, buf2), ir->init_t+step*ir->delta_t,
1657                         buf3,
1658                         sqrt(p_ssd/ncons_loc), p_max,
1659                         ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1660                         ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1661                 if (fplog)
1662                 {
1663                     fprintf(fplog, "%s", buf);
1664                 }
1665                 fprintf(stderr, "%s", buf);
1666                 lincs_warning(fplog, cr->dd, x, xprime, pbc,
1667                               lincsd->nc, lincsd->bla, lincsd->bllen,
1668                               ir->LincsWarnAngle, maxwarn, warncount);
1669             }
1670             bOK = (p_max < 0.5);
1671         }
1672
1673         if (lincsd->ncg_flex)
1674         {
1675             for (i = 0; (i < lincsd->nc); i++)
1676             {
1677                 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1678                 {
1679                     lincsd->bllen[i] = 0;
1680                 }
1681             }
1682         }
1683     }
1684     else
1685     {
1686         /* The OpenMP parallel region of constrain_lincs for derivatives */
1687 #pragma omp parallel num_threads(lincsd->nth)
1688         {
1689             int th = gmx_omp_get_thread_num();
1690
1691             do_lincsp(x, xprime, min_proj, pbc, lincsd, th,
1692                       md->invmass, econq, bCalcDHDL,
1693                       bCalcVir, th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1694         }
1695     }
1696
1697     if (bCalcDHDL)
1698     {
1699         /* Reduce the dH/dlambda contributions over the threads */
1700         real dhdlambda;
1701         int  th;
1702
1703         dhdlambda = 0;
1704         for (th = 0; th < lincsd->nth; th++)
1705         {
1706             dhdlambda += lincsd->th[th].dhdlambda;
1707         }
1708         if (econqCoord)
1709         {
1710             /* dhdlambda contains dH/dlambda*dt^2, correct for this */
1711             dhdlambda /= ir->delta_t*ir->delta_t;
1712         }
1713         *dvdlambda += dhdlambda;
1714     }
1715
1716     if (bCalcVir && lincsd->nth > 1)
1717     {
1718         for (i = 1; i < lincsd->nth; i++)
1719         {
1720             m_add(vir_r_m_dr, lincsd->th[i].vir_r_m_dr, vir_r_m_dr);
1721         }
1722     }
1723
1724     /* count assuming nit=1 */
1725     inc_nrnb(nrnb, eNR_LINCS, lincsd->nc);
1726     inc_nrnb(nrnb, eNR_LINCSMAT, (2+lincsd->nOrder)*lincsd->ncc);
1727     if (lincsd->ntriangle > 0)
1728     {
1729         inc_nrnb(nrnb, eNR_LINCSMAT, lincsd->nOrder*lincsd->ncc_triangle);
1730     }
1731     if (v)
1732     {
1733         inc_nrnb(nrnb, eNR_CONSTR_V, lincsd->nc*2);
1734     }
1735     if (bCalcVir)
1736     {
1737         inc_nrnb(nrnb, eNR_CONSTR_VIR, lincsd->nc);
1738     }
1739
1740     return bOK;
1741 }