src/gromacs/mdlib/clincs.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /* This file is completely threadsafe - keep it that way! */
  38 #include "gmxpre.h"
  39
  40 #include "config.h"
  41
  42 #include <math.h>
  43 #include <stdlib.h>
  44
  45 #include "gromacs/legacyheaders/types/commrec.h"
  46 #include "gromacs/legacyheaders/constr.h"
  47 #include "gromacs/legacyheaders/copyrite.h"
  48 #include "gromacs/math/units.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/pbcutil/pbc.h"
  51 #include "gromacs/legacyheaders/mdrun.h"
  52 #include "gromacs/legacyheaders/nrnb.h"
  53 #include "gromacs/legacyheaders/domdec.h"
  54 #include "gromacs/topology/mtop_util.h"
  55 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
  56
  57 #include "gromacs/fileio/gmxfio.h"
  58 #include "gromacs/topology/block.h"
  59 #include "gromacs/utility/fatalerror.h"
  60 #include "gromacs/utility/gmxomp.h"
  61 #include "gromacs/utility/smalloc.h"
  62
  63 typedef struct {
  64     int    b0;         /* first constraint for this thread */
  65     int    b1;         /* b1-1 is the last constraint for this thread */
  66     int    nind;       /* number of indices */
  67     int   *ind;        /* constraint index for updating atom data */
  68     int    nind_r;     /* number of indices */
  69     int   *ind_r;      /* constraint index for updating atom data */
  70     int    ind_nalloc; /* allocation size of ind and ind_r */
  71     tensor vir_r_m_dr; /* temporary variable for virial calculation */
  72 } lincs_thread_t;
  73
  74 typedef struct gmx_lincsdata {
  75     int             ncg;          /* the global number of constraints */
  76     int             ncg_flex;     /* the global number of flexible constraints */
  77     int             ncg_triangle; /* the global number of constraints in triangles */
  78     int             nIter;        /* the number of iterations */
  79     int             nOrder;       /* the order of the matrix expansion */
  80     int             nc;           /* the number of constraints */
  81     int             nc_alloc;     /* the number we allocated memory for */
  82     int             ncc;          /* the number of constraint connections */
  83     int             ncc_alloc;    /* the number we allocated memory for */
  84     real            matlam;       /* the FE lambda value used for filling blc and blmf */
  85     real           *bllen0;       /* the reference distance in topology A */
  86     real           *ddist;        /* the reference distance in top B - the r.d. in top A */
  87     int            *bla;          /* the atom pairs involved in the constraints */
  88     real           *blc;          /* 1/sqrt(invmass1 + invmass2) */
  89     real           *blc1;         /* as blc, but with all masses 1 */
  90     int            *blnr;         /* index into blbnb and blmf */
  91     int            *blbnb;        /* list of constraint connections */
  92     int             ntriangle;    /* the local number of constraints in triangles */
  93     int            *triangle;     /* the list of triangle constraints */
  94     int            *tri_bits;     /* the bits tell if the matrix element should be used */
  95     int             ncc_triangle; /* the number of constraint connections in triangles */
  96     gmx_bool        bCommIter;    /* communicate before each LINCS interation */
  97     real           *blmf;         /* matrix of mass factors for constraint connections */
  98     real           *blmf1;        /* as blmf, but with all masses 1 */
  99     real           *bllen;        /* the reference bond length */
 100     int             nth;          /* The number of threads doing LINCS */
 101     lincs_thread_t *th;           /* LINCS thread division */
 102     unsigned       *atf;          /* atom flags for thread parallelization */
 103     int             atf_nalloc;   /* allocation size of atf */
 104     /* arrays for temporary storage in the LINCS algorithm */
 105     rvec           *tmpv;
 106     real           *tmpncc;
 107     real           *tmp1;
 108     real           *tmp2;
 109     real           *tmp3;
 110     real           *tmp4;
 111     real           *mlambda; /* the Lagrange multipliers * -1 */
 112     /* storage for the constraint RMS relative deviation output */
 113     real            rmsd_data[3];
 114 } t_gmx_lincsdata;
 115
 116 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
 117 {
 118     return lincsd->rmsd_data;
 119 }
 120
 121 real lincs_rmsd(struct gmx_lincsdata *lincsd, gmx_bool bSD2)
 122 {
 123     if (lincsd->rmsd_data[0] > 0)
 124     {
 125         return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
 126     }
 127     else
 128     {
 129         return 0;
 130     }
 131 }
 132
 133 /* Do a set of nrec LINCS matrix multiplications.
 134  * This function will return with up to date thread-local
 135  * constraint data, without an OpenMP barrier.
 136  */
 137 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
 138                                 int b0, int b1,
 139                                 const real *blcc,
 140                                 real *rhs1, real *rhs2, real *sol)
 141 {
 142     int        nrec, rec, b, j, n, nr0, nr1;
 143     real       mvb, *swap;
 144     int        ntriangle, tb, bits;
 145     const int *blnr     = lincsd->blnr, *blbnb = lincsd->blbnb;
 146     const int *triangle = lincsd->triangle, *tri_bits = lincsd->tri_bits;
 147
 148     ntriangle = lincsd->ntriangle;
 149     nrec      = lincsd->nOrder;
 150
 151     for (rec = 0; rec < nrec; rec++)
 152     {
 153 #pragma omp barrier
 154         for (b = b0; b < b1; b++)
 155         {
 156             mvb = 0;
 157             for (n = blnr[b]; n < blnr[b+1]; n++)
 158             {
 159                 j   = blbnb[n];
 160                 mvb = mvb + blcc[n]*rhs1[j];
 161             }
 162             rhs2[b] = mvb;
 163             sol[b]  = sol[b] + mvb;
 164         }
 165         swap = rhs1;
 166         rhs1 = rhs2;
 167         rhs2 = swap;
 168     } /* nrec*(ncons+2*nrtot) flops */
 169
 170     if (ntriangle > 0)
 171     {
 172         /* Perform an extra nrec recursions for only the constraints
 173          * involved in rigid triangles.
 174          * In this way their accuracy should come close to those of the other
 175          * constraints, since traingles of constraints can produce eigenvalues
 176          * around 0.7, while the effective eigenvalue for bond constraints
 177          * is around 0.4 (and 0.7*0.7=0.5).
 178          */
 179         /* We need to copy the temporary array, since only the elements
 180          * for constraints involved in triangles are updated and then
 181          * the pointers are swapped. This saving copying the whole arrary.
 182          * We need barrier as other threads might still be reading from rhs2.
 183          */
 184 #pragma omp barrier
 185         for (b = b0; b < b1; b++)
 186         {
 187             rhs2[b] = rhs1[b];
 188         }
 189 #pragma omp barrier
 190 #pragma omp master
 191         {
 192             for (rec = 0; rec < nrec; rec++)
 193             {
 194                 for (tb = 0; tb < ntriangle; tb++)
 195                 {
 196                     b    = triangle[tb];
 197                     bits = tri_bits[tb];
 198                     mvb  = 0;
 199                     nr0  = blnr[b];
 200                     nr1  = blnr[b+1];
 201                     for (n = nr0; n < nr1; n++)
 202                     {
 203                         if (bits & (1<<(n-nr0)))
 204                         {
 205                             j   = blbnb[n];
 206                             mvb = mvb + blcc[n]*rhs1[j];
 207                         }
 208                     }
 209                     rhs2[b] = mvb;
 210                     sol[b]  = sol[b] + mvb;
 211                 }
 212                 swap = rhs1;
 213                 rhs1 = rhs2;
 214                 rhs2 = swap;
 215             }
 216         } /* flops count is missing here */
 217
 218         /* We need a barrier here as the calling routine will continue
 219          * to operate on the thread-local constraints without barrier.
 220          */
 221 #pragma omp barrier
 222     }
 223 }
 224
 225 static void lincs_update_atoms_noind(int ncons, const int *bla,
 226                                      real prefac,
 227                                      const real *fac, rvec *r,
 228                                      const real *invmass,
 229                                      rvec *x)
 230 {
 231     int  b, i, j;
 232     real mvb, im1, im2, tmp0, tmp1, tmp2;
 233
 234     if (invmass != NULL)
 235     {
 236         for (b = 0; b < ncons; b++)
 237         {
 238             i        = bla[2*b];
 239             j        = bla[2*b+1];
 240             mvb      = prefac*fac[b];
 241             im1      = invmass[i];
 242             im2      = invmass[j];
 243             tmp0     = r[b][0]*mvb;
 244             tmp1     = r[b][1]*mvb;
 245             tmp2     = r[b][2]*mvb;
 246             x[i][0] -= tmp0*im1;
 247             x[i][1] -= tmp1*im1;
 248             x[i][2] -= tmp2*im1;
 249             x[j][0] += tmp0*im2;
 250             x[j][1] += tmp1*im2;
 251             x[j][2] += tmp2*im2;
 252         } /* 16 ncons flops */
 253     }
 254     else
 255     {
 256         for (b = 0; b < ncons; b++)
 257         {
 258             i        = bla[2*b];
 259             j        = bla[2*b+1];
 260             mvb      = prefac*fac[b];
 261             tmp0     = r[b][0]*mvb;
 262             tmp1     = r[b][1]*mvb;
 263             tmp2     = r[b][2]*mvb;
 264             x[i][0] -= tmp0;
 265             x[i][1] -= tmp1;
 266             x[i][2] -= tmp2;
 267             x[j][0] += tmp0;
 268             x[j][1] += tmp1;
 269             x[j][2] += tmp2;
 270         }
 271     }
 272 }
 273
 274 static void lincs_update_atoms_ind(int ncons, const int *ind, const int *bla,
 275                                    real prefac,
 276                                    const real *fac, rvec *r,
 277                                    const real *invmass,
 278                                    rvec *x)
 279 {
 280     int  bi, b, i, j;
 281     real mvb, im1, im2, tmp0, tmp1, tmp2;
 282
 283     if (invmass != NULL)
 284     {
 285         for (bi = 0; bi < ncons; bi++)
 286         {
 287             b        = ind[bi];
 288             i        = bla[2*b];
 289             j        = bla[2*b+1];
 290             mvb      = prefac*fac[b];
 291             im1      = invmass[i];
 292             im2      = invmass[j];
 293             tmp0     = r[b][0]*mvb;
 294             tmp1     = r[b][1]*mvb;
 295             tmp2     = r[b][2]*mvb;
 296             x[i][0] -= tmp0*im1;
 297             x[i][1] -= tmp1*im1;
 298             x[i][2] -= tmp2*im1;
 299             x[j][0] += tmp0*im2;
 300             x[j][1] += tmp1*im2;
 301             x[j][2] += tmp2*im2;
 302         } /* 16 ncons flops */
 303     }
 304     else
 305     {
 306         for (bi = 0; bi < ncons; bi++)
 307         {
 308             b        = ind[bi];
 309             i        = bla[2*b];
 310             j        = bla[2*b+1];
 311             mvb      = prefac*fac[b];
 312             tmp0     = r[b][0]*mvb;
 313             tmp1     = r[b][1]*mvb;
 314             tmp2     = r[b][2]*mvb;
 315             x[i][0] -= tmp0;
 316             x[i][1] -= tmp1;
 317             x[i][2] -= tmp2;
 318             x[j][0] += tmp0;
 319             x[j][1] += tmp1;
 320             x[j][2] += tmp2;
 321         } /* 16 ncons flops */
 322     }
 323 }
 324
 325 static void lincs_update_atoms(struct gmx_lincsdata *li, int th,
 326                                real prefac,
 327                                const real *fac, rvec *r,
 328                                const real *invmass,
 329                                rvec *x)
 330 {
 331     if (li->nth == 1)
 332     {
 333         /* Single thread, we simply update for all constraints */
 334         lincs_update_atoms_noind(li->nc, li->bla, prefac, fac, r, invmass, x);
 335     }
 336     else
 337     {
 338         /* Update the atom vector components for our thread local
 339          * constraints that only access our local atom range.
 340          * This can be done without a barrier.
 341          */
 342         lincs_update_atoms_ind(li->th[th].nind, li->th[th].ind,
 343                                li->bla, prefac, fac, r, invmass, x);
 344
 345         if (li->th[li->nth].nind > 0)
 346         {
 347             /* Update the constraints that operate on atoms
 348              * in multiple thread atom blocks on the master thread.
 349              */
 350 #pragma omp barrier
 351 #pragma omp master
 352             {
 353                 lincs_update_atoms_ind(li->th[li->nth].nind,
 354                                        li->th[li->nth].ind,
 355                                        li->bla, prefac, fac, r, invmass, x);
 356             }
 357         }
 358     }
 359 }
 360
 361 /* LINCS projection, works on derivatives of the coordinates */
 362 static void do_lincsp(rvec *x, rvec *f, rvec *fp, t_pbc *pbc,
 363                       struct gmx_lincsdata *lincsd, int th,
 364                       real *invmass,
 365                       int econq, real *dvdlambda,
 366                       gmx_bool bCalcVir, tensor rmdf)
 367 {
 368     int      b0, b1, b, i, j, k, n;
 369     real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, wfac, lam;
 370     rvec     dx;
 371     int     *bla, *blnr, *blbnb;
 372     rvec    *r;
 373     real    *blc, *blmf, *blcc, *rhs1, *rhs2, *sol;
 374
 375     b0 = lincsd->th[th].b0;
 376     b1 = lincsd->th[th].b1;
 377
 378     bla    = lincsd->bla;
 379     r      = lincsd->tmpv;
 380     blnr   = lincsd->blnr;
 381     blbnb  = lincsd->blbnb;
 382     if (econq != econqForce)
 383     {
 384         /* Use mass-weighted parameters */
 385         blc  = lincsd->blc;
 386         blmf = lincsd->blmf;
 387     }
 388     else
 389     {
 390         /* Use non mass-weighted parameters */
 391         blc  = lincsd->blc1;
 392         blmf = lincsd->blmf1;
 393     }
 394     blcc   = lincsd->tmpncc;
 395     rhs1   = lincsd->tmp1;
 396     rhs2   = lincsd->tmp2;
 397     sol    = lincsd->tmp3;
 398
 399     /* Compute normalized i-j vectors */
 400     if (pbc)
 401     {
 402         for (b = b0; b < b1; b++)
 403         {
 404             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
 405             unitv(dx, r[b]);
 406         }
 407     }
 408     else
 409     {
 410         for (b = b0; b < b1; b++)
 411         {
 412             rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
 413             unitv(dx, r[b]);
 414         } /* 16 ncons flops */
 415     }
 416
 417 #pragma omp barrier
 418     for (b = b0; b < b1; b++)
 419     {
 420         tmp0 = r[b][0];
 421         tmp1 = r[b][1];
 422         tmp2 = r[b][2];
 423         i    = bla[2*b];
 424         j    = bla[2*b+1];
 425         for (n = blnr[b]; n < blnr[b+1]; n++)
 426         {
 427             k       = blbnb[n];
 428             blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 429         } /* 6 nr flops */
 430         mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
 431                       tmp1*(f[i][1] - f[j][1]) +
 432                       tmp2*(f[i][2] - f[j][2]));
 433         rhs1[b] = mvb;
 434         sol[b]  = mvb;
 435         /* 7 flops */
 436     }
 437     /* Together: 23*ncons + 6*nrtot flops */
 438
 439     lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 440     /* nrec*(ncons+2*nrtot) flops */
 441
 442     if (econq == econqDeriv_FlexCon)
 443     {
 444         /* We only want to constraint the flexible constraints,
 445          * so we mask out the normal ones by setting sol to 0.
 446          */
 447         for (b = b0; b < b1; b++)
 448         {
 449             if (!(lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
 450             {
 451                 sol[b] = 0;
 452             }
 453         }
 454     }
 455
 456     /* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
 457     for (b = b0; b < b1; b++)
 458     {
 459         sol[b] *= blc[b];
 460     }
 461
 462     /* When constraining forces, we should not use mass weighting,
 463      * so we pass invmass=NULL, which results in the use of 1 for all atoms.
 464      */
 465     lincs_update_atoms(lincsd, th, 1.0, sol, r,
 466                        (econq != econqForce) ? invmass : NULL, fp);
 467
 468     if (dvdlambda != NULL)
 469     {
 470 #pragma omp barrier
 471         for (b = b0; b < b1; b++)
 472         {
 473             *dvdlambda -= sol[b]*lincsd->ddist[b];
 474         }
 475         /* 10 ncons flops */
 476     }
 477
 478     if (bCalcVir)
 479     {
 480         /* Constraint virial,
 481          * determines sum r_bond x delta f,
 482          * where delta f is the constraint correction
 483          * of the quantity that is being constrained.
 484          */
 485         for (b = b0; b < b1; b++)
 486         {
 487             mvb = lincsd->bllen[b]*sol[b];
 488             for (i = 0; i < DIM; i++)
 489             {
 490                 tmp1 = mvb*r[b][i];
 491                 for (j = 0; j < DIM; j++)
 492                 {
 493                     rmdf[i][j] += tmp1*r[b][j];
 494                 }
 495             }
 496         } /* 23 ncons flops */
 497     }
 498 }
 499
 500 static void do_lincs(rvec *x, rvec *xp, matrix box, t_pbc *pbc,
 501                      struct gmx_lincsdata *lincsd, int th,
 502                      real *invmass,
 503                      t_commrec *cr,
 504                      gmx_bool bCalcLambda,
 505                      real wangle, int *warn,
 506                      real invdt, rvec *v,
 507                      gmx_bool bCalcVir, tensor vir_r_m_dr)
 508 {
 509     int      b0, b1, b, i, j, k, n, iter;
 510     real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, len2, dlen2, wfac;
 511     rvec     dx;
 512     int     *bla, *blnr, *blbnb;
 513     rvec    *r;
 514     real    *blc, *blmf, *bllen, *blcc, *rhs1, *rhs2, *sol, *blc_sol, *mlambda;
 515     int     *nlocat;
 516
 517     b0 = lincsd->th[th].b0;
 518     b1 = lincsd->th[th].b1;
 519
 520     bla     = lincsd->bla;
 521     r       = lincsd->tmpv;
 522     blnr    = lincsd->blnr;
 523     blbnb   = lincsd->blbnb;
 524     blc     = lincsd->blc;
 525     blmf    = lincsd->blmf;
 526     bllen   = lincsd->bllen;
 527     blcc    = lincsd->tmpncc;
 528     rhs1    = lincsd->tmp1;
 529     rhs2    = lincsd->tmp2;
 530     sol     = lincsd->tmp3;
 531     blc_sol = lincsd->tmp4;
 532     mlambda = lincsd->mlambda;
 533
 534     if (DOMAINDECOMP(cr) && cr->dd->constraints)
 535     {
 536         nlocat = dd_constraints_nlocalatoms(cr->dd);
 537     }
 538     else
 539     {
 540         nlocat = NULL;
 541     }
 542
 543     if (pbc)
 544     {
 545         /* Compute normalized i-j vectors */
 546         for (b = b0; b < b1; b++)
 547         {
 548             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
 549             unitv(dx, r[b]);
 550         }
 551 #pragma omp barrier
 552         for (b = b0; b < b1; b++)
 553         {
 554             for (n = blnr[b]; n < blnr[b+1]; n++)
 555             {
 556                 blcc[n] = blmf[n]*iprod(r[b], r[blbnb[n]]);
 557             }
 558             pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
 559             mvb     = blc[b]*(iprod(r[b], dx) - bllen[b]);
 560             rhs1[b] = mvb;
 561             sol[b]  = mvb;
 562         }
 563     }
 564     else
 565     {
 566         /* Compute normalized i-j vectors */
 567         for (b = b0; b < b1; b++)
 568         {
 569             i       = bla[2*b];
 570             j       = bla[2*b+1];
 571             tmp0    = x[i][0] - x[j][0];
 572             tmp1    = x[i][1] - x[j][1];
 573             tmp2    = x[i][2] - x[j][2];
 574             rlen    = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
 575             r[b][0] = rlen*tmp0;
 576             r[b][1] = rlen*tmp1;
 577             r[b][2] = rlen*tmp2;
 578         } /* 16 ncons flops */
 579
 580 #pragma omp barrier
 581         for (b = b0; b < b1; b++)
 582         {
 583             tmp0 = r[b][0];
 584             tmp1 = r[b][1];
 585             tmp2 = r[b][2];
 586             len  = bllen[b];
 587             i    = bla[2*b];
 588             j    = bla[2*b+1];
 589             for (n = blnr[b]; n < blnr[b+1]; n++)
 590             {
 591                 k       = blbnb[n];
 592                 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 593             } /* 6 nr flops */
 594             mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
 595                           tmp1*(xp[i][1] - xp[j][1]) +
 596                           tmp2*(xp[i][2] - xp[j][2]) - len);
 597             rhs1[b] = mvb;
 598             sol[b]  = mvb;
 599             /* 10 flops */
 600         }
 601         /* Together: 26*ncons + 6*nrtot flops */
 602     }
 603
 604     lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 605     /* nrec*(ncons+2*nrtot) flops */
 606
 607     for (b = b0; b < b1; b++)
 608     {
 609         mlambda[b] = blc[b]*sol[b];
 610     }
 611
 612     /* Update the coordinates */
 613     lincs_update_atoms(lincsd, th, 1.0, mlambda, r, invmass, xp);
 614
 615     /*
 616      ********  Correction for centripetal effects  ********
 617      */
 618
 619     wfac = cos(DEG2RAD*wangle);
 620     wfac = wfac*wfac;
 621
 622     for (iter = 0; iter < lincsd->nIter; iter++)
 623     {
 624         if ((lincsd->bCommIter && DOMAINDECOMP(cr) && cr->dd->constraints))
 625         {
 626 #pragma omp barrier
 627 #pragma omp master
 628             {
 629                 /* Communicate the corrected non-local coordinates */
 630                 if (DOMAINDECOMP(cr))
 631                 {
 632                     dd_move_x_constraints(cr->dd, box, xp, NULL, FALSE);
 633                 }
 634             }
 635         }
 636
 637 #pragma omp barrier
 638         for (b = b0; b < b1; b++)
 639         {
 640             len = bllen[b];
 641             if (pbc)
 642             {
 643                 pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
 644             }
 645             else
 646             {
 647                 rvec_sub(xp[bla[2*b]], xp[bla[2*b+1]], dx);
 648             }
 649             len2  = len*len;
 650             dlen2 = 2*len2 - norm2(dx);
 651             if (dlen2 < wfac*len2 && (nlocat == NULL || nlocat[b]))
 652             {
 653                 /* not race free - see detailed comment in caller */
 654                 *warn = b;
 655             }
 656             if (dlen2 > 0)
 657             {
 658                 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
 659             }
 660             else
 661             {
 662                 mvb = blc[b]*len;
 663             }
 664             rhs1[b] = mvb;
 665             sol[b]  = mvb;
 666         } /* 20*ncons flops */
 667
 668         lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 669         /* nrec*(ncons+2*nrtot) flops */
 670
 671         for (b = b0; b < b1; b++)
 672         {
 673             mvb         = blc[b]*sol[b];
 674             blc_sol[b]  = mvb;
 675             mlambda[b] += mvb;
 676         }
 677
 678         /* Update the coordinates */
 679         lincs_update_atoms(lincsd, th, 1.0, blc_sol, r, invmass, xp);
 680     }
 681     /* nit*ncons*(37+9*nrec) flops */
 682
 683     if (v != NULL)
 684     {
 685         /* Update the velocities */
 686         lincs_update_atoms(lincsd, th, invdt, mlambda, r, invmass, v);
 687         /* 16 ncons flops */
 688     }
 689
 690     if (nlocat != NULL && bCalcLambda)
 691     {
 692         /* In lincs_update_atoms thread might cross-read mlambda */
 693 #pragma omp barrier
 694
 695         /* Only account for local atoms */
 696         for (b = b0; b < b1; b++)
 697         {
 698             mlambda[b] *= 0.5*nlocat[b];
 699         }
 700     }
 701
 702     if (bCalcVir)
 703     {
 704         /* Constraint virial */
 705         for (b = b0; b < b1; b++)
 706         {
 707             tmp0 = -bllen[b]*mlambda[b];
 708             for (i = 0; i < DIM; i++)
 709             {
 710                 tmp1 = tmp0*r[b][i];
 711                 for (j = 0; j < DIM; j++)
 712                 {
 713                     vir_r_m_dr[i][j] -= tmp1*r[b][j];
 714                 }
 715             }
 716         } /* 22 ncons flops */
 717     }
 718
 719     /* Total:
 720      * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
 721      * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
 722      *
 723      * (26+nrec)*ncons + (6+2*nrec)*nrtot
 724      * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
 725      * if nit=1
 726      * (63+nrec)*ncons + (6+4*nrec)*nrtot
 727      */
 728 }
 729
 730 void set_lincs_matrix(struct gmx_lincsdata *li, real *invmass, real lambda)
 731 {
 732     int        i, a1, a2, n, k, sign, center;
 733     int        end, nk, kk;
 734     const real invsqrt2 = 0.7071067811865475244;
 735
 736     for (i = 0; (i < li->nc); i++)
 737     {
 738         a1          = li->bla[2*i];
 739         a2          = li->bla[2*i+1];
 740         li->blc[i]  = gmx_invsqrt(invmass[a1] + invmass[a2]);
 741         li->blc1[i] = invsqrt2;
 742     }
 743
 744     /* Construct the coupling coefficient matrix blmf */
 745     li->ntriangle    = 0;
 746     li->ncc_triangle = 0;
 747     for (i = 0; (i < li->nc); i++)
 748     {
 749         a1 = li->bla[2*i];
 750         a2 = li->bla[2*i+1];
 751         for (n = li->blnr[i]; (n < li->blnr[i+1]); n++)
 752         {
 753             k = li->blbnb[n];
 754             if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
 755             {
 756                 sign = -1;
 757             }
 758             else
 759             {
 760                 sign = 1;
 761             }
 762             if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
 763             {
 764                 center = a1;
 765                 end    = a2;
 766             }
 767             else
 768             {
 769                 center = a2;
 770                 end    = a1;
 771             }
 772             li->blmf[n]  = sign*invmass[center]*li->blc[i]*li->blc[k];
 773             li->blmf1[n] = sign*0.5;
 774             if (li->ncg_triangle > 0)
 775             {
 776                 /* Look for constraint triangles */
 777                 for (nk = li->blnr[k]; (nk < li->blnr[k+1]); nk++)
 778                 {
 779                     kk = li->blbnb[nk];
 780                     if (kk != i && kk != k &&
 781                         (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
 782                     {
 783                         if (li->ntriangle == 0 ||
 784                             li->triangle[li->ntriangle-1] < i)
 785                         {
 786                             /* Add this constraint to the triangle list */
 787                             li->triangle[li->ntriangle] = i;
 788                             li->tri_bits[li->ntriangle] = 0;
 789                             li->ntriangle++;
 790                             if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
 791                             {
 792                                 gmx_fatal(FARGS, "A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
 793                                           li->blnr[i+1] - li->blnr[i],
 794                                           sizeof(li->tri_bits[0])*8-1);
 795                             }
 796                         }
 797                         li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
 798                         li->ncc_triangle++;
 799                     }
 800                 }
 801             }
 802         }
 803     }
 804
 805     if (debug)
 806     {
 807         fprintf(debug, "Of the %d constraints %d participate in triangles\n",
 808                 li->nc, li->ntriangle);
 809         fprintf(debug, "There are %d couplings of which %d in triangles\n",
 810                 li->ncc, li->ncc_triangle);
 811     }
 812
 813     /* Set matlam,
 814      * so we know with which lambda value the masses have been set.
 815      */
 816     li->matlam = lambda;
 817 }
 818
 819 static int count_triangle_constraints(t_ilist *ilist, t_blocka *at2con)
 820 {
 821     int      ncon1, ncon_tot;
 822     int      c0, a00, a01, n1, c1, a10, a11, ac1, n2, c2, a20, a21;
 823     int      ncon_triangle;
 824     gmx_bool bTriangle;
 825     t_iatom *ia1, *ia2, *iap;
 826
 827     ncon1    = ilist[F_CONSTR].nr/3;
 828     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 829
 830     ia1 = ilist[F_CONSTR].iatoms;
 831     ia2 = ilist[F_CONSTRNC].iatoms;
 832
 833     ncon_triangle = 0;
 834     for (c0 = 0; c0 < ncon_tot; c0++)
 835     {
 836         bTriangle = FALSE;
 837         iap       = constr_iatomptr(ncon1, ia1, ia2, c0);
 838         a00       = iap[1];
 839         a01       = iap[2];
 840         for (n1 = at2con->index[a01]; n1 < at2con->index[a01+1]; n1++)
 841         {
 842             c1 = at2con->a[n1];
 843             if (c1 != c0)
 844             {
 845                 iap = constr_iatomptr(ncon1, ia1, ia2, c1);
 846                 a10 = iap[1];
 847                 a11 = iap[2];
 848                 if (a10 == a01)
 849                 {
 850                     ac1 = a11;
 851                 }
 852                 else
 853                 {
 854                     ac1 = a10;
 855                 }
 856                 for (n2 = at2con->index[ac1]; n2 < at2con->index[ac1+1]; n2++)
 857                 {
 858                     c2 = at2con->a[n2];
 859                     if (c2 != c0 && c2 != c1)
 860                     {
 861                         iap = constr_iatomptr(ncon1, ia1, ia2, c2);
 862                         a20 = iap[1];
 863                         a21 = iap[2];
 864                         if (a20 == a00 || a21 == a00)
 865                         {
 866                             bTriangle = TRUE;
 867                         }
 868                     }
 869                 }
 870             }
 871         }
 872         if (bTriangle)
 873         {
 874             ncon_triangle++;
 875         }
 876     }
 877
 878     return ncon_triangle;
 879 }
 880
 881 static gmx_bool more_than_two_sequential_constraints(const t_ilist  *ilist,
 882                                                      const t_blocka *at2con)
 883 {
 884     t_iatom  *ia1, *ia2, *iap;
 885     int       ncon1, ncon_tot, c;
 886     int       a1, a2;
 887     gmx_bool  bMoreThanTwoSequentialConstraints;
 888
 889     ncon1    = ilist[F_CONSTR].nr/3;
 890     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 891
 892     ia1 = ilist[F_CONSTR].iatoms;
 893     ia2 = ilist[F_CONSTRNC].iatoms;
 894
 895     bMoreThanTwoSequentialConstraints = FALSE;
 896     for (c = 0; c < ncon_tot && !bMoreThanTwoSequentialConstraints; c++)
 897     {
 898         iap = constr_iatomptr(ncon1, ia1, ia2, c);
 899         a1  = iap[1];
 900         a2  = iap[2];
 901         /* Check if this constraint has constraints connected at both atoms */
 902         if (at2con->index[a1+1] - at2con->index[a1] > 1 &&
 903             at2con->index[a2+1] - at2con->index[a2] > 1)
 904         {
 905             bMoreThanTwoSequentialConstraints = TRUE;
 906         }
 907     }
 908
 909     return bMoreThanTwoSequentialConstraints;
 910 }
 911
 912 static int int_comp(const void *a, const void *b)
 913 {
 914     return (*(int *)a) - (*(int *)b);
 915 }
 916
 917 gmx_lincsdata_t init_lincs(FILE *fplog, gmx_mtop_t *mtop,
 918                            int nflexcon_global, t_blocka *at2con,
 919                            gmx_bool bPLINCS, int nIter, int nProjOrder)
 920 {
 921     struct gmx_lincsdata *li;
 922     int                   mb;
 923     gmx_moltype_t        *molt;
 924
 925     if (fplog)
 926     {
 927         fprintf(fplog, "\nInitializing%s LINear Constraint Solver\n",
 928                 bPLINCS ? " Parallel" : "");
 929     }
 930
 931     snew(li, 1);
 932
 933     li->ncg      =
 934         gmx_mtop_ftype_count(mtop, F_CONSTR) +
 935         gmx_mtop_ftype_count(mtop, F_CONSTRNC);
 936     li->ncg_flex = nflexcon_global;
 937
 938     li->nIter  = nIter;
 939     li->nOrder = nProjOrder;
 940
 941     li->ncg_triangle = 0;
 942     li->bCommIter    = FALSE;
 943     for (mb = 0; mb < mtop->nmolblock; mb++)
 944     {
 945         molt              = &mtop->moltype[mtop->molblock[mb].type];
 946         li->ncg_triangle +=
 947             mtop->molblock[mb].nmol*
 948             count_triangle_constraints(molt->ilist,
 949                                        &at2con[mtop->molblock[mb].type]);
 950         if (bPLINCS && li->bCommIter == FALSE)
 951         {
 952             /* Check if we need to communicate not only before LINCS,
 953              * but also before each iteration.
 954              * The check for only two sequential constraints is only
 955              * useful for the common case of H-bond only constraints.
 956              * With more effort we could also make it useful for small
 957              * molecules with nr. sequential constraints <= nOrder-1.
 958              */
 959             li->bCommIter = (li->nOrder < 1 || more_than_two_sequential_constraints(molt->ilist, &at2con[mtop->molblock[mb].type]));
 960         }
 961     }
 962     if (debug && bPLINCS)
 963     {
 964         fprintf(debug, "PLINCS communication before each iteration: %d\n",
 965                 li->bCommIter);
 966     }
 967
 968     /* LINCS can run on any number of threads.
 969      * Currently the number is fixed for the whole simulation,
 970      * but it could be set in set_lincs().
 971      */
 972     li->nth = gmx_omp_nthreads_get(emntLINCS);
 973     if (li->nth == 1)
 974     {
 975         snew(li->th, 1);
 976     }
 977     else
 978     {
 979         /* Allocate an extra elements for "thread-overlap" constraints */
 980         snew(li->th, li->nth+1);
 981     }
 982     if (debug)
 983     {
 984         fprintf(debug, "LINCS: using %d threads\n", li->nth);
 985     }
 986
 987     if (bPLINCS || li->ncg_triangle > 0)
 988     {
 989         please_cite(fplog, "Hess2008a");
 990     }
 991     else
 992     {
 993         please_cite(fplog, "Hess97a");
 994     }
 995
 996     if (fplog)
 997     {
 998         fprintf(fplog, "The number of constraints is %d\n", li->ncg);
 999         if (bPLINCS)
1000         {
1001             fprintf(fplog, "There are inter charge-group constraints,\n"
1002                     "will communicate selected coordinates each lincs iteration\n");
1003         }
1004         if (li->ncg_triangle > 0)
1005         {
1006             fprintf(fplog,
1007                     "%d constraints are involved in constraint triangles,\n"
1008                     "will apply an additional matrix expansion of order %d for couplings\n"
1009                     "between constraints inside triangles\n",
1010                     li->ncg_triangle, li->nOrder);
1011         }
1012     }
1013
1014     return li;
1015 }
1016
1017 /* Sets up the work division over the threads */
1018 static void lincs_thread_setup(struct gmx_lincsdata *li, int natoms)
1019 {
1020     lincs_thread_t *li_m;
1021     int             th;
1022     unsigned       *atf;
1023     int             a;
1024
1025     if (natoms > li->atf_nalloc)
1026     {
1027         li->atf_nalloc = over_alloc_large(natoms);
1028         srenew(li->atf, li->atf_nalloc);
1029     }
1030
1031     atf = li->atf;
1032     /* Clear the atom flags */
1033     for (a = 0; a < natoms; a++)
1034     {
1035         atf[a] = 0;
1036     }
1037
1038     for (th = 0; th < li->nth; th++)
1039     {
1040         lincs_thread_t *li_th;
1041         int             b;
1042
1043         li_th = &li->th[th];
1044
1045         /* The constraints are divided equally over the threads */
1046         li_th->b0 = (li->nc* th   )/li->nth;
1047         li_th->b1 = (li->nc*(th+1))/li->nth;
1048
1049         if (th < sizeof(*atf)*8)
1050         {
1051             /* For each atom set a flag for constraints from each */
1052             for (b = li_th->b0; b < li_th->b1; b++)
1053             {
1054                 atf[li->bla[b*2]  ] |= (1U<<th);
1055                 atf[li->bla[b*2+1]] |= (1U<<th);
1056             }
1057         }
1058     }
1059
1060 #pragma omp parallel for num_threads(li->nth) schedule(static)
1061     for (th = 0; th < li->nth; th++)
1062     {
1063         lincs_thread_t *li_th;
1064         unsigned        mask;
1065         int             b;
1066
1067         li_th = &li->th[th];
1068
1069         if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
1070         {
1071             li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
1072             srenew(li_th->ind, li_th->ind_nalloc);
1073             srenew(li_th->ind_r, li_th->ind_nalloc);
1074         }
1075
1076         if (th < sizeof(*atf)*8)
1077         {
1078             mask = (1U<<th) - 1U;
1079
1080             li_th->nind   = 0;
1081             li_th->nind_r = 0;
1082             for (b = li_th->b0; b < li_th->b1; b++)
1083             {
1084                 /* We let the constraint with the lowest thread index
1085                  * operate on atoms with constraints from multiple threads.
1086                  */
1087                 if (((atf[li->bla[b*2]]   & mask) == 0) &&
1088                     ((atf[li->bla[b*2+1]] & mask) == 0))
1089                 {
1090                     /* Add the constraint to the local atom update index */
1091                     li_th->ind[li_th->nind++] = b;
1092                 }
1093                 else
1094                 {
1095                     /* Add the constraint to the rest block */
1096                     li_th->ind_r[li_th->nind_r++] = b;
1097                 }
1098             }
1099         }
1100         else
1101         {
1102             /* We are out of bits, assign all constraints to rest */
1103             for (b = li_th->b0; b < li_th->b1; b++)
1104             {
1105                 li_th->ind_r[li_th->nind_r++] = b;
1106             }
1107         }
1108     }
1109
1110     /* We need to copy all constraints which have not be assigned
1111      * to a thread to a separate list which will be handled by one thread.
1112      */
1113     li_m = &li->th[li->nth];
1114
1115     li_m->nind = 0;
1116     for (th = 0; th < li->nth; th++)
1117     {
1118         lincs_thread_t *li_th;
1119         int             b;
1120
1121         li_th   = &li->th[th];
1122
1123         if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1124         {
1125             li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1126             srenew(li_m->ind, li_m->ind_nalloc);
1127         }
1128
1129         for (b = 0; b < li_th->nind_r; b++)
1130         {
1131             li_m->ind[li_m->nind++] = li_th->ind_r[b];
1132         }
1133
1134         if (debug)
1135         {
1136             fprintf(debug, "LINCS thread %d: %d constraints\n",
1137                     th, li_th->nind);
1138         }
1139     }
1140
1141     if (debug)
1142     {
1143         fprintf(debug, "LINCS thread r: %d constraints\n",
1144                 li_m->nind);
1145     }
1146 }
1147
1148
1149 void set_lincs(t_idef *idef, t_mdatoms *md,
1150                gmx_bool bDynamics, t_commrec *cr,
1151                struct gmx_lincsdata *li)
1152 {
1153     int          start, natoms, nflexcon;
1154     t_blocka     at2con;
1155     t_iatom     *iatom;
1156     int          i, k, ncc_alloc, ni, con, nconnect, concon;
1157     int          type, a1, a2;
1158     real         lenA = 0, lenB;
1159     gmx_bool     bLocal;
1160
1161     li->nc  = 0;
1162     li->ncc = 0;
1163     /* Zero the thread index ranges.
1164      * Otherwise without local constraints we could return with old ranges.
1165      */
1166     for (i = 0; i < li->nth; i++)
1167     {
1168         li->th[i].b0   = 0;
1169         li->th[i].b1   = 0;
1170         li->th[i].nind = 0;
1171     }
1172     if (li->nth > 1)
1173     {
1174         li->th[li->nth].nind = 0;
1175     }
1176
1177     /* This is the local topology, so there are only F_CONSTR constraints */
1178     if (idef->il[F_CONSTR].nr == 0)
1179     {
1180         /* There are no constraints,
1181          * we do not need to fill any data structures.
1182          */
1183         return;
1184     }
1185
1186     if (debug)
1187     {
1188         fprintf(debug, "Building the LINCS connectivity\n");
1189     }
1190
1191     if (DOMAINDECOMP(cr))
1192     {
1193         if (cr->dd->constraints)
1194         {
1195             dd_get_constraint_range(cr->dd, &start, &natoms);
1196         }
1197         else
1198         {
1199             natoms = cr->dd->nat_home;
1200         }
1201         start = 0;
1202     }
1203     else
1204     {
1205         start  = 0;
1206         natoms = md->homenr;
1207     }
1208     at2con = make_at2con(start, natoms, idef->il, idef->iparams, bDynamics,
1209                          &nflexcon);
1210
1211
1212     if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1213     {
1214         li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1215         srenew(li->bllen0, li->nc_alloc);
1216         srenew(li->ddist, li->nc_alloc);
1217         srenew(li->bla, 2*li->nc_alloc);
1218         srenew(li->blc, li->nc_alloc);
1219         srenew(li->blc1, li->nc_alloc);
1220         srenew(li->blnr, li->nc_alloc+1);
1221         srenew(li->bllen, li->nc_alloc);
1222         srenew(li->tmpv, li->nc_alloc);
1223         srenew(li->tmp1, li->nc_alloc);
1224         srenew(li->tmp2, li->nc_alloc);
1225         srenew(li->tmp3, li->nc_alloc);
1226         srenew(li->tmp4, li->nc_alloc);
1227         srenew(li->mlambda, li->nc_alloc);
1228         if (li->ncg_triangle > 0)
1229         {
1230             /* This is allocating too much, but it is difficult to improve */
1231             srenew(li->triangle, li->nc_alloc);
1232             srenew(li->tri_bits, li->nc_alloc);
1233         }
1234     }
1235
1236     iatom = idef->il[F_CONSTR].iatoms;
1237
1238     ncc_alloc   = li->ncc_alloc;
1239     li->blnr[0] = 0;
1240
1241     ni = idef->il[F_CONSTR].nr/3;
1242
1243     con           = 0;
1244     nconnect      = 0;
1245     li->blnr[con] = nconnect;
1246     for (i = 0; i < ni; i++)
1247     {
1248         bLocal = TRUE;
1249         type   = iatom[3*i];
1250         a1     = iatom[3*i+1];
1251         a2     = iatom[3*i+2];
1252         lenA   = idef->iparams[type].constr.dA;
1253         lenB   = idef->iparams[type].constr.dB;
1254         /* Skip the flexible constraints when not doing dynamics */
1255         if (bDynamics || lenA != 0 || lenB != 0)
1256         {
1257             li->bllen0[con]  = lenA;
1258             li->ddist[con]   = lenB - lenA;
1259             /* Set the length to the topology A length */
1260             li->bllen[con]   = li->bllen0[con];
1261             li->bla[2*con]   = a1;
1262             li->bla[2*con+1] = a2;
1263             /* Construct the constraint connection matrix blbnb */
1264             for (k = at2con.index[a1-start]; k < at2con.index[a1-start+1]; k++)
1265             {
1266                 concon = at2con.a[k];
1267                 if (concon != i)
1268                 {
1269                     if (nconnect >= ncc_alloc)
1270                     {
1271                         ncc_alloc = over_alloc_small(nconnect+1);
1272                         srenew(li->blbnb, ncc_alloc);
1273                     }
1274                     li->blbnb[nconnect++] = concon;
1275                 }
1276             }
1277             for (k = at2con.index[a2-start]; k < at2con.index[a2-start+1]; k++)
1278             {
1279                 concon = at2con.a[k];
1280                 if (concon != i)
1281                 {
1282                     if (nconnect+1 > ncc_alloc)
1283                     {
1284                         ncc_alloc = over_alloc_small(nconnect+1);
1285                         srenew(li->blbnb, ncc_alloc);
1286                     }
1287                     li->blbnb[nconnect++] = concon;
1288                 }
1289             }
1290             li->blnr[con+1] = nconnect;
1291
1292             if (cr->dd == NULL)
1293             {
1294                 /* Order the blbnb matrix to optimize memory access */
1295                 qsort(&(li->blbnb[li->blnr[con]]), li->blnr[con+1]-li->blnr[con],
1296                       sizeof(li->blbnb[0]), int_comp);
1297             }
1298             /* Increase the constraint count */
1299             con++;
1300         }
1301     }
1302
1303     done_blocka(&at2con);
1304
1305     /* This is the real number of constraints,
1306      * without dynamics the flexible constraints are not present.
1307      */
1308     li->nc = con;
1309
1310     li->ncc = li->blnr[con];
1311     if (cr->dd == NULL)
1312     {
1313         /* Since the matrix is static, we can free some memory */
1314         ncc_alloc = li->ncc;
1315         srenew(li->blbnb, ncc_alloc);
1316     }
1317
1318     if (ncc_alloc > li->ncc_alloc)
1319     {
1320         li->ncc_alloc = ncc_alloc;
1321         srenew(li->blmf, li->ncc_alloc);
1322         srenew(li->blmf1, li->ncc_alloc);
1323         srenew(li->tmpncc, li->ncc_alloc);
1324     }
1325
1326     if (debug)
1327     {
1328         fprintf(debug, "Number of constraints is %d, couplings %d\n",
1329                 li->nc, li->ncc);
1330     }
1331
1332     if (li->nth == 1)
1333     {
1334         li->th[0].b0 = 0;
1335         li->th[0].b1 = li->nc;
1336     }
1337     else
1338     {
1339         lincs_thread_setup(li, md->nr);
1340     }
1341
1342     set_lincs_matrix(li, md->invmass, md->lambda);
1343 }
1344
1345 static void lincs_warning(FILE *fplog,
1346                           gmx_domdec_t *dd, rvec *x, rvec *xprime, t_pbc *pbc,
1347                           int ncons, int *bla, real *bllen, real wangle,
1348                           int maxwarn, int *warncount)
1349 {
1350     int  b, i, j;
1351     rvec v0, v1;
1352     real wfac, d0, d1, cosine;
1353     char buf[STRLEN];
1354
1355     wfac = cos(DEG2RAD*wangle);
1356
1357     sprintf(buf, "bonds that rotated more than %g degrees:\n"
1358             " atom 1 atom 2  angle  previous, current, constraint length\n",
1359             wangle);
1360     fprintf(stderr, "%s", buf);
1361     if (fplog)
1362     {
1363         fprintf(fplog, "%s", buf);
1364     }
1365
1366     for (b = 0; b < ncons; b++)
1367     {
1368         i = bla[2*b];
1369         j = bla[2*b+1];
1370         if (pbc)
1371         {
1372             pbc_dx_aiuc(pbc, x[i], x[j], v0);
1373             pbc_dx_aiuc(pbc, xprime[i], xprime[j], v1);
1374         }
1375         else
1376         {
1377             rvec_sub(x[i], x[j], v0);
1378             rvec_sub(xprime[i], xprime[j], v1);
1379         }
1380         d0     = norm(v0);
1381         d1     = norm(v1);
1382         cosine = iprod(v0, v1)/(d0*d1);
1383         if (cosine < wfac)
1384         {
1385             sprintf(buf, " %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n",
1386                     ddglatnr(dd, i), ddglatnr(dd, j),
1387                     RAD2DEG*acos(cosine), d0, d1, bllen[b]);
1388             fprintf(stderr, "%s", buf);
1389             if (fplog)
1390             {
1391                 fprintf(fplog, "%s", buf);
1392             }
1393             if (!gmx_isfinite(d1))
1394             {
1395                 gmx_fatal(FARGS, "Bond length not finite.");
1396             }
1397
1398             (*warncount)++;
1399         }
1400     }
1401     if (*warncount > maxwarn)
1402     {
1403         too_many_constraint_warnings(econtLINCS, *warncount);
1404     }
1405 }
1406
1407 static void cconerr(gmx_domdec_t *dd,
1408                     int ncons, int *bla, real *bllen, rvec *x, t_pbc *pbc,
1409                     real *ncons_loc, real *ssd, real *max, int *imax)
1410 {
1411     real       len, d, ma, ssd2, r2;
1412     int       *nlocat, count, b, im;
1413     rvec       dx;
1414
1415     if (dd && dd->constraints)
1416     {
1417         nlocat = dd_constraints_nlocalatoms(dd);
1418     }
1419     else
1420     {
1421         nlocat = 0;
1422     }
1423
1424     ma    = 0;
1425     ssd2  = 0;
1426     im    = 0;
1427     count = 0;
1428     for (b = 0; b < ncons; b++)
1429     {
1430         if (pbc)
1431         {
1432             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
1433         }
1434         else
1435         {
1436             rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
1437         }
1438         r2  = norm2(dx);
1439         len = r2*gmx_invsqrt(r2);
1440         d   = fabs(len/bllen[b]-1);
1441         if (d > ma && (nlocat == NULL || nlocat[b]))
1442         {
1443             ma = d;
1444             im = b;
1445         }
1446         if (nlocat == NULL)
1447         {
1448             ssd2 += d*d;
1449             count++;
1450         }
1451         else
1452         {
1453             ssd2  += nlocat[b]*d*d;
1454             count += nlocat[b];
1455         }
1456     }
1457
1458     *ncons_loc = (nlocat ? 0.5 : 1)*count;
1459     *ssd       = (nlocat ? 0.5 : 1)*ssd2;
1460     *max       = ma;
1461     *imax      = im;
1462 }
1463
1464 gmx_bool constrain_lincs(FILE *fplog, gmx_bool bLog, gmx_bool bEner,
1465                          t_inputrec *ir,
1466                          gmx_int64_t step,
1467                          struct gmx_lincsdata *lincsd, t_mdatoms *md,
1468                          t_commrec *cr,
1469                          rvec *x, rvec *xprime, rvec *min_proj,
1470                          matrix box, t_pbc *pbc,
1471                          real lambda, real *dvdlambda,
1472                          real invdt, rvec *v,
1473                          gmx_bool bCalcVir, tensor vir_r_m_dr,
1474                          int econq,
1475                          t_nrnb *nrnb,
1476                          int maxwarn, int *warncount)
1477 {
1478     char      buf[STRLEN], buf2[22], buf3[STRLEN];
1479     int       i, warn, p_imax, error;
1480     real      ncons_loc, p_ssd, p_max = 0;
1481     rvec      dx;
1482     gmx_bool  bOK;
1483
1484     bOK = TRUE;
1485
1486     if (lincsd->nc == 0 && cr->dd == NULL)
1487     {
1488         if (bLog || bEner)
1489         {
1490             lincsd->rmsd_data[0] = 0;
1491             if (ir->eI == eiSD2 && v == NULL)
1492             {
1493                 i = 2;
1494             }
1495             else
1496             {
1497                 i = 1;
1498             }
1499             lincsd->rmsd_data[i] = 0;
1500         }
1501
1502         return bOK;
1503     }
1504
1505     if (econq == econqCoord)
1506     {
1507         if (ir->efep != efepNO)
1508         {
1509             if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1510             {
1511                 set_lincs_matrix(lincsd, md->invmass, md->lambda);
1512             }
1513
1514             for (i = 0; i < lincsd->nc; i++)
1515             {
1516                 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1517             }
1518         }
1519
1520         if (lincsd->ncg_flex)
1521         {
1522             /* Set the flexible constraint lengths to the old lengths */
1523             if (pbc != NULL)
1524             {
1525                 for (i = 0; i < lincsd->nc; i++)
1526                 {
1527                     if (lincsd->bllen[i] == 0)
1528                     {
1529                         pbc_dx_aiuc(pbc, x[lincsd->bla[2*i]], x[lincsd->bla[2*i+1]], dx);
1530                         lincsd->bllen[i] = norm(dx);
1531                     }
1532                 }
1533             }
1534             else
1535             {
1536                 for (i = 0; i < lincsd->nc; i++)
1537                 {
1538                     if (lincsd->bllen[i] == 0)
1539                     {
1540                         lincsd->bllen[i] =
1541                             sqrt(distance2(x[lincsd->bla[2*i]],
1542                                            x[lincsd->bla[2*i+1]]));
1543                     }
1544                 }
1545             }
1546         }
1547
1548         if (bLog && fplog)
1549         {
1550             cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1551                     &ncons_loc, &p_ssd, &p_max, &p_imax);
1552         }
1553
1554         /* This warn var can be updated by multiple threads
1555          * at the same time. But as we only need to detect
1556          * if a warning occured or not, this is not an issue.
1557          */
1558         warn = -1;
1559
1560         /* The OpenMP parallel region of constrain_lincs for coords */
1561 #pragma omp parallel num_threads(lincsd->nth)
1562         {
1563             int th = gmx_omp_get_thread_num();
1564
1565             clear_mat(lincsd->th[th].vir_r_m_dr);
1566
1567             do_lincs(x, xprime, box, pbc, lincsd, th,
1568                      md->invmass, cr,
1569                      bCalcVir || (ir->efep != efepNO),
1570                      ir->LincsWarnAngle, &warn,
1571                      invdt, v, bCalcVir,
1572                      th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1573         }
1574
1575         if (ir->efep != efepNO)
1576         {
1577             real dt_2, dvdl = 0;
1578
1579             dt_2 = 1.0/(ir->delta_t*ir->delta_t);
1580             for (i = 0; (i < lincsd->nc); i++)
1581             {
1582                 dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
1583             }
1584             *dvdlambda += dvdl;
1585         }
1586
1587         if (bLog && fplog && lincsd->nc > 0)
1588         {
1589             fprintf(fplog, "   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
1590             fprintf(fplog, "       Before LINCS          %.6f    %.6f %6d %6d\n",
1591                     sqrt(p_ssd/ncons_loc), p_max,
1592                     ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1593                     ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1594         }
1595         if (bLog || bEner)
1596         {
1597             cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1598                     &ncons_loc, &p_ssd, &p_max, &p_imax);
1599             /* Check if we are doing the second part of SD */
1600             if (ir->eI == eiSD2 && v == NULL)
1601             {
1602                 i = 2;
1603             }
1604             else
1605             {
1606                 i = 1;
1607             }
1608             lincsd->rmsd_data[0] = ncons_loc;
1609             lincsd->rmsd_data[i] = p_ssd;
1610         }
1611         else
1612         {
1613             lincsd->rmsd_data[0] = 0;
1614             lincsd->rmsd_data[1] = 0;
1615             lincsd->rmsd_data[2] = 0;
1616         }
1617         if (bLog && fplog && lincsd->nc > 0)
1618         {
1619             fprintf(fplog,
1620                     "        After LINCS          %.6f    %.6f %6d %6d\n\n",
1621                     sqrt(p_ssd/ncons_loc), p_max,
1622                     ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1623                     ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1624         }
1625
1626         if (warn >= 0)
1627         {
1628             if (maxwarn >= 0)
1629             {
1630                 cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1631                         &ncons_loc, &p_ssd, &p_max, &p_imax);
1632                 if (MULTISIM(cr))
1633                 {
1634                     sprintf(buf3, " in simulation %d", cr->ms->sim);
1635                 }
1636                 else
1637                 {
1638                     buf3[0] = 0;
1639                 }
1640                 sprintf(buf, "\nStep %s, time %g (ps)  LINCS WARNING%s\n"
1641                         "relative constraint deviation after LINCS:\n"
1642                         "rms %.6f, max %.6f (between atoms %d and %d)\n",
1643                         gmx_step_str(step, buf2), ir->init_t+step*ir->delta_t,
1644                         buf3,
1645                         sqrt(p_ssd/ncons_loc), p_max,
1646                         ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1647                         ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1648                 if (fplog)
1649                 {
1650                     fprintf(fplog, "%s", buf);
1651                 }
1652                 fprintf(stderr, "%s", buf);
1653                 lincs_warning(fplog, cr->dd, x, xprime, pbc,
1654                               lincsd->nc, lincsd->bla, lincsd->bllen,
1655                               ir->LincsWarnAngle, maxwarn, warncount);
1656             }
1657             bOK = (p_max < 0.5);
1658         }
1659
1660         if (lincsd->ncg_flex)
1661         {
1662             for (i = 0; (i < lincsd->nc); i++)
1663             {
1664                 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1665                 {
1666                     lincsd->bllen[i] = 0;
1667                 }
1668             }
1669         }
1670     }
1671     else
1672     {
1673         /* The OpenMP parallel region of constrain_lincs for derivatives */
1674 #pragma omp parallel num_threads(lincsd->nth)
1675         {
1676             int th = gmx_omp_get_thread_num();
1677
1678             do_lincsp(x, xprime, min_proj, pbc, lincsd, th,
1679                       md->invmass, econq, ir->efep != efepNO ? dvdlambda : NULL,
1680                       bCalcVir, th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1681         }
1682     }
1683
1684     if (bCalcVir && lincsd->nth > 1)
1685     {
1686         for (i = 1; i < lincsd->nth; i++)
1687         {
1688             m_add(vir_r_m_dr, lincsd->th[i].vir_r_m_dr, vir_r_m_dr);
1689         }
1690     }
1691
1692     /* count assuming nit=1 */
1693     inc_nrnb(nrnb, eNR_LINCS, lincsd->nc);
1694     inc_nrnb(nrnb, eNR_LINCSMAT, (2+lincsd->nOrder)*lincsd->ncc);
1695     if (lincsd->ntriangle > 0)
1696     {
1697         inc_nrnb(nrnb, eNR_LINCSMAT, lincsd->nOrder*lincsd->ncc_triangle);
1698     }
1699     if (v)
1700     {
1701         inc_nrnb(nrnb, eNR_CONSTR_V, lincsd->nc*2);
1702     }
1703     if (bCalcVir)
1704     {
1705         inc_nrnb(nrnb, eNR_CONSTR_VIR, lincsd->nc);
1706     }
1707
1708     return bOK;
1709 }