src/gromacs/mdlib/clincs.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /* This file is completely threadsafe - keep it that way! */
  38 #include "gmxpre.h"
  39
  40 #include <math.h>
  41 #include <stdlib.h>
  42
  43 #include "gromacs/fileio/gmxfio.h"
  44 #include "gromacs/legacyheaders/constr.h"
  45 #include "gromacs/legacyheaders/copyrite.h"
  46 #include "gromacs/legacyheaders/domdec.h"
  47 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
  48 #include "gromacs/legacyheaders/mdrun.h"
  49 #include "gromacs/legacyheaders/nrnb.h"
  50 #include "gromacs/legacyheaders/types/commrec.h"
  51 #include "gromacs/math/units.h"
  52 #include "gromacs/math/vec.h"
  53 #include "gromacs/pbcutil/pbc.h"
  54 #include "gromacs/topology/block.h"
  55 #include "gromacs/topology/mtop_util.h"
  56 #include "gromacs/utility/fatalerror.h"
  57 #include "gromacs/utility/gmxomp.h"
  58 #include "gromacs/utility/smalloc.h"
  59
  60 typedef struct {
  61     int    b0;         /* first constraint for this thread */
  62     int    b1;         /* b1-1 is the last constraint for this thread */
  63     int    nind;       /* number of indices */
  64     int   *ind;        /* constraint index for updating atom data */
  65     int    nind_r;     /* number of indices */
  66     int   *ind_r;      /* constraint index for updating atom data */
  67     int    ind_nalloc; /* allocation size of ind and ind_r */
  68     tensor vir_r_m_dr; /* temporary variable for virial calculation */
  69 } lincs_thread_t;
  70
  71 typedef struct gmx_lincsdata {
  72     int             ncg;          /* the global number of constraints */
  73     int             ncg_flex;     /* the global number of flexible constraints */
  74     int             ncg_triangle; /* the global number of constraints in triangles */
  75     int             nIter;        /* the number of iterations */
  76     int             nOrder;       /* the order of the matrix expansion */
  77     int             nc;           /* the number of constraints */
  78     int             nc_alloc;     /* the number we allocated memory for */
  79     int             ncc;          /* the number of constraint connections */
  80     int             ncc_alloc;    /* the number we allocated memory for */
  81     real            matlam;       /* the FE lambda value used for filling blc and blmf */
  82     real           *bllen0;       /* the reference distance in topology A */
  83     real           *ddist;        /* the reference distance in top B - the r.d. in top A */
  84     int            *bla;          /* the atom pairs involved in the constraints */
  85     real           *blc;          /* 1/sqrt(invmass1 + invmass2) */
  86     real           *blc1;         /* as blc, but with all masses 1 */
  87     int            *blnr;         /* index into blbnb and blmf */
  88     int            *blbnb;        /* list of constraint connections */
  89     int             ntriangle;    /* the local number of constraints in triangles */
  90     int            *triangle;     /* the list of triangle constraints */
  91     int            *tri_bits;     /* the bits tell if the matrix element should be used */
  92     int             ncc_triangle; /* the number of constraint connections in triangles */
  93     gmx_bool        bCommIter;    /* communicate before each LINCS interation */
  94     real           *blmf;         /* matrix of mass factors for constraint connections */
  95     real           *blmf1;        /* as blmf, but with all masses 1 */
  96     real           *bllen;        /* the reference bond length */
  97     int             nth;          /* The number of threads doing LINCS */
  98     lincs_thread_t *th;           /* LINCS thread division */
  99     unsigned       *atf;          /* atom flags for thread parallelization */
 100     int             atf_nalloc;   /* allocation size of atf */
 101     /* arrays for temporary storage in the LINCS algorithm */
 102     rvec           *tmpv;
 103     real           *tmpncc;
 104     real           *tmp1;
 105     real           *tmp2;
 106     real           *tmp3;
 107     real           *tmp4;
 108     real           *mlambda; /* the Lagrange multipliers * -1 */
 109     /* storage for the constraint RMS relative deviation output */
 110     real            rmsd_data[3];
 111 } t_gmx_lincsdata;
 112
 113 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
 114 {
 115     return lincsd->rmsd_data;
 116 }
 117
 118 real lincs_rmsd(struct gmx_lincsdata *lincsd, gmx_bool bSD2)
 119 {
 120     if (lincsd->rmsd_data[0] > 0)
 121     {
 122         return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
 123     }
 124     else
 125     {
 126         return 0;
 127     }
 128 }
 129
 130 /* Do a set of nrec LINCS matrix multiplications.
 131  * This function will return with up to date thread-local
 132  * constraint data, without an OpenMP barrier.
 133  */
 134 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
 135                                 int b0, int b1,
 136                                 const real *blcc,
 137                                 real *rhs1, real *rhs2, real *sol)
 138 {
 139     int        nrec, rec, b, j, n, nr0, nr1;
 140     real       mvb, *swap;
 141     int        ntriangle, tb, bits;
 142     const int *blnr     = lincsd->blnr, *blbnb = lincsd->blbnb;
 143     const int *triangle = lincsd->triangle, *tri_bits = lincsd->tri_bits;
 144
 145     ntriangle = lincsd->ntriangle;
 146     nrec      = lincsd->nOrder;
 147
 148     for (rec = 0; rec < nrec; rec++)
 149     {
 150 #pragma omp barrier
 151         for (b = b0; b < b1; b++)
 152         {
 153             mvb = 0;
 154             for (n = blnr[b]; n < blnr[b+1]; n++)
 155             {
 156                 j   = blbnb[n];
 157                 mvb = mvb + blcc[n]*rhs1[j];
 158             }
 159             rhs2[b] = mvb;
 160             sol[b]  = sol[b] + mvb;
 161         }
 162         swap = rhs1;
 163         rhs1 = rhs2;
 164         rhs2 = swap;
 165     } /* nrec*(ncons+2*nrtot) flops */
 166
 167     if (ntriangle > 0)
 168     {
 169         /* Perform an extra nrec recursions for only the constraints
 170          * involved in rigid triangles.
 171          * In this way their accuracy should come close to those of the other
 172          * constraints, since traingles of constraints can produce eigenvalues
 173          * around 0.7, while the effective eigenvalue for bond constraints
 174          * is around 0.4 (and 0.7*0.7=0.5).
 175          */
 176         /* We need to copy the temporary array, since only the elements
 177          * for constraints involved in triangles are updated and then
 178          * the pointers are swapped. This saving copying the whole arrary.
 179          * We need barrier as other threads might still be reading from rhs2.
 180          */
 181 #pragma omp barrier
 182         for (b = b0; b < b1; b++)
 183         {
 184             rhs2[b] = rhs1[b];
 185         }
 186 #pragma omp barrier
 187 #pragma omp master
 188         {
 189             for (rec = 0; rec < nrec; rec++)
 190             {
 191                 for (tb = 0; tb < ntriangle; tb++)
 192                 {
 193                     b    = triangle[tb];
 194                     bits = tri_bits[tb];
 195                     mvb  = 0;
 196                     nr0  = blnr[b];
 197                     nr1  = blnr[b+1];
 198                     for (n = nr0; n < nr1; n++)
 199                     {
 200                         if (bits & (1<<(n-nr0)))
 201                         {
 202                             j   = blbnb[n];
 203                             mvb = mvb + blcc[n]*rhs1[j];
 204                         }
 205                     }
 206                     rhs2[b] = mvb;
 207                     sol[b]  = sol[b] + mvb;
 208                 }
 209                 swap = rhs1;
 210                 rhs1 = rhs2;
 211                 rhs2 = swap;
 212             }
 213         } /* flops count is missing here */
 214
 215         /* We need a barrier here as the calling routine will continue
 216          * to operate on the thread-local constraints without barrier.
 217          */
 218 #pragma omp barrier
 219     }
 220 }
 221
 222 static void lincs_update_atoms_noind(int ncons, const int *bla,
 223                                      real prefac,
 224                                      const real *fac, rvec *r,
 225                                      const real *invmass,
 226                                      rvec *x)
 227 {
 228     int  b, i, j;
 229     real mvb, im1, im2, tmp0, tmp1, tmp2;
 230
 231     if (invmass != NULL)
 232     {
 233         for (b = 0; b < ncons; b++)
 234         {
 235             i        = bla[2*b];
 236             j        = bla[2*b+1];
 237             mvb      = prefac*fac[b];
 238             im1      = invmass[i];
 239             im2      = invmass[j];
 240             tmp0     = r[b][0]*mvb;
 241             tmp1     = r[b][1]*mvb;
 242             tmp2     = r[b][2]*mvb;
 243             x[i][0] -= tmp0*im1;
 244             x[i][1] -= tmp1*im1;
 245             x[i][2] -= tmp2*im1;
 246             x[j][0] += tmp0*im2;
 247             x[j][1] += tmp1*im2;
 248             x[j][2] += tmp2*im2;
 249         } /* 16 ncons flops */
 250     }
 251     else
 252     {
 253         for (b = 0; b < ncons; b++)
 254         {
 255             i        = bla[2*b];
 256             j        = bla[2*b+1];
 257             mvb      = prefac*fac[b];
 258             tmp0     = r[b][0]*mvb;
 259             tmp1     = r[b][1]*mvb;
 260             tmp2     = r[b][2]*mvb;
 261             x[i][0] -= tmp0;
 262             x[i][1] -= tmp1;
 263             x[i][2] -= tmp2;
 264             x[j][0] += tmp0;
 265             x[j][1] += tmp1;
 266             x[j][2] += tmp2;
 267         }
 268     }
 269 }
 270
 271 static void lincs_update_atoms_ind(int ncons, const int *ind, const int *bla,
 272                                    real prefac,
 273                                    const real *fac, rvec *r,
 274                                    const real *invmass,
 275                                    rvec *x)
 276 {
 277     int  bi, b, i, j;
 278     real mvb, im1, im2, tmp0, tmp1, tmp2;
 279
 280     if (invmass != NULL)
 281     {
 282         for (bi = 0; bi < ncons; bi++)
 283         {
 284             b        = ind[bi];
 285             i        = bla[2*b];
 286             j        = bla[2*b+1];
 287             mvb      = prefac*fac[b];
 288             im1      = invmass[i];
 289             im2      = invmass[j];
 290             tmp0     = r[b][0]*mvb;
 291             tmp1     = r[b][1]*mvb;
 292             tmp2     = r[b][2]*mvb;
 293             x[i][0] -= tmp0*im1;
 294             x[i][1] -= tmp1*im1;
 295             x[i][2] -= tmp2*im1;
 296             x[j][0] += tmp0*im2;
 297             x[j][1] += tmp1*im2;
 298             x[j][2] += tmp2*im2;
 299         } /* 16 ncons flops */
 300     }
 301     else
 302     {
 303         for (bi = 0; bi < ncons; bi++)
 304         {
 305             b        = ind[bi];
 306             i        = bla[2*b];
 307             j        = bla[2*b+1];
 308             mvb      = prefac*fac[b];
 309             tmp0     = r[b][0]*mvb;
 310             tmp1     = r[b][1]*mvb;
 311             tmp2     = r[b][2]*mvb;
 312             x[i][0] -= tmp0;
 313             x[i][1] -= tmp1;
 314             x[i][2] -= tmp2;
 315             x[j][0] += tmp0;
 316             x[j][1] += tmp1;
 317             x[j][2] += tmp2;
 318         } /* 16 ncons flops */
 319     }
 320 }
 321
 322 static void lincs_update_atoms(struct gmx_lincsdata *li, int th,
 323                                real prefac,
 324                                const real *fac, rvec *r,
 325                                const real *invmass,
 326                                rvec *x)
 327 {
 328     if (li->nth == 1)
 329     {
 330         /* Single thread, we simply update for all constraints */
 331         lincs_update_atoms_noind(li->nc, li->bla, prefac, fac, r, invmass, x);
 332     }
 333     else
 334     {
 335         /* Update the atom vector components for our thread local
 336          * constraints that only access our local atom range.
 337          * This can be done without a barrier.
 338          */
 339         lincs_update_atoms_ind(li->th[th].nind, li->th[th].ind,
 340                                li->bla, prefac, fac, r, invmass, x);
 341
 342         if (li->th[li->nth].nind > 0)
 343         {
 344             /* Update the constraints that operate on atoms
 345              * in multiple thread atom blocks on the master thread.
 346              */
 347 #pragma omp barrier
 348 #pragma omp master
 349             {
 350                 lincs_update_atoms_ind(li->th[li->nth].nind,
 351                                        li->th[li->nth].ind,
 352                                        li->bla, prefac, fac, r, invmass, x);
 353             }
 354         }
 355     }
 356 }
 357
 358 /* LINCS projection, works on derivatives of the coordinates */
 359 static void do_lincsp(rvec *x, rvec *f, rvec *fp, t_pbc *pbc,
 360                       struct gmx_lincsdata *lincsd, int th,
 361                       real *invmass,
 362                       int econq, real *dvdlambda,
 363                       gmx_bool bCalcVir, tensor rmdf)
 364 {
 365     int      b0, b1, b, i, j, k, n;
 366     real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, wfac, lam;
 367     rvec     dx;
 368     int     *bla, *blnr, *blbnb;
 369     rvec    *r;
 370     real    *blc, *blmf, *blcc, *rhs1, *rhs2, *sol;
 371
 372     b0 = lincsd->th[th].b0;
 373     b1 = lincsd->th[th].b1;
 374
 375     bla    = lincsd->bla;
 376     r      = lincsd->tmpv;
 377     blnr   = lincsd->blnr;
 378     blbnb  = lincsd->blbnb;
 379     if (econq != econqForce)
 380     {
 381         /* Use mass-weighted parameters */
 382         blc  = lincsd->blc;
 383         blmf = lincsd->blmf;
 384     }
 385     else
 386     {
 387         /* Use non mass-weighted parameters */
 388         blc  = lincsd->blc1;
 389         blmf = lincsd->blmf1;
 390     }
 391     blcc   = lincsd->tmpncc;
 392     rhs1   = lincsd->tmp1;
 393     rhs2   = lincsd->tmp2;
 394     sol    = lincsd->tmp3;
 395
 396     /* Compute normalized i-j vectors */
 397     if (pbc)
 398     {
 399         for (b = b0; b < b1; b++)
 400         {
 401             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
 402             unitv(dx, r[b]);
 403         }
 404     }
 405     else
 406     {
 407         for (b = b0; b < b1; b++)
 408         {
 409             rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
 410             unitv(dx, r[b]);
 411         } /* 16 ncons flops */
 412     }
 413
 414 #pragma omp barrier
 415     for (b = b0; b < b1; b++)
 416     {
 417         tmp0 = r[b][0];
 418         tmp1 = r[b][1];
 419         tmp2 = r[b][2];
 420         i    = bla[2*b];
 421         j    = bla[2*b+1];
 422         for (n = blnr[b]; n < blnr[b+1]; n++)
 423         {
 424             k       = blbnb[n];
 425             blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 426         } /* 6 nr flops */
 427         mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
 428                       tmp1*(f[i][1] - f[j][1]) +
 429                       tmp2*(f[i][2] - f[j][2]));
 430         rhs1[b] = mvb;
 431         sol[b]  = mvb;
 432         /* 7 flops */
 433     }
 434     /* Together: 23*ncons + 6*nrtot flops */
 435
 436     lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 437     /* nrec*(ncons+2*nrtot) flops */
 438
 439     if (econq == econqDeriv_FlexCon)
 440     {
 441         /* We only want to constraint the flexible constraints,
 442          * so we mask out the normal ones by setting sol to 0.
 443          */
 444         for (b = b0; b < b1; b++)
 445         {
 446             if (!(lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
 447             {
 448                 sol[b] = 0;
 449             }
 450         }
 451     }
 452
 453     /* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
 454     for (b = b0; b < b1; b++)
 455     {
 456         sol[b] *= blc[b];
 457     }
 458
 459     /* When constraining forces, we should not use mass weighting,
 460      * so we pass invmass=NULL, which results in the use of 1 for all atoms.
 461      */
 462     lincs_update_atoms(lincsd, th, 1.0, sol, r,
 463                        (econq != econqForce) ? invmass : NULL, fp);
 464
 465     if (dvdlambda != NULL)
 466     {
 467 #pragma omp barrier
 468         for (b = b0; b < b1; b++)
 469         {
 470             *dvdlambda -= sol[b]*lincsd->ddist[b];
 471         }
 472         /* 10 ncons flops */
 473     }
 474
 475     if (bCalcVir)
 476     {
 477         /* Constraint virial,
 478          * determines sum r_bond x delta f,
 479          * where delta f is the constraint correction
 480          * of the quantity that is being constrained.
 481          */
 482         for (b = b0; b < b1; b++)
 483         {
 484             mvb = lincsd->bllen[b]*sol[b];
 485             for (i = 0; i < DIM; i++)
 486             {
 487                 tmp1 = mvb*r[b][i];
 488                 for (j = 0; j < DIM; j++)
 489                 {
 490                     rmdf[i][j] += tmp1*r[b][j];
 491                 }
 492             }
 493         } /* 23 ncons flops */
 494     }
 495 }
 496
 497 static void do_lincs(rvec *x, rvec *xp, matrix box, t_pbc *pbc,
 498                      struct gmx_lincsdata *lincsd, int th,
 499                      real *invmass,
 500                      t_commrec *cr,
 501                      gmx_bool bCalcLambda,
 502                      real wangle, int *warn,
 503                      real invdt, rvec *v,
 504                      gmx_bool bCalcVir, tensor vir_r_m_dr)
 505 {
 506     int      b0, b1, b, i, j, k, n, iter;
 507     real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, len2, dlen2, wfac;
 508     rvec     dx;
 509     int     *bla, *blnr, *blbnb;
 510     rvec    *r;
 511     real    *blc, *blmf, *bllen, *blcc, *rhs1, *rhs2, *sol, *blc_sol, *mlambda;
 512     int     *nlocat;
 513
 514     b0 = lincsd->th[th].b0;
 515     b1 = lincsd->th[th].b1;
 516
 517     bla     = lincsd->bla;
 518     r       = lincsd->tmpv;
 519     blnr    = lincsd->blnr;
 520     blbnb   = lincsd->blbnb;
 521     blc     = lincsd->blc;
 522     blmf    = lincsd->blmf;
 523     bllen   = lincsd->bllen;
 524     blcc    = lincsd->tmpncc;
 525     rhs1    = lincsd->tmp1;
 526     rhs2    = lincsd->tmp2;
 527     sol     = lincsd->tmp3;
 528     blc_sol = lincsd->tmp4;
 529     mlambda = lincsd->mlambda;
 530
 531     if (DOMAINDECOMP(cr) && cr->dd->constraints)
 532     {
 533         nlocat = dd_constraints_nlocalatoms(cr->dd);
 534     }
 535     else
 536     {
 537         nlocat = NULL;
 538     }
 539
 540     if (pbc)
 541     {
 542         /* Compute normalized i-j vectors */
 543         for (b = b0; b < b1; b++)
 544         {
 545             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
 546             unitv(dx, r[b]);
 547         }
 548 #pragma omp barrier
 549         for (b = b0; b < b1; b++)
 550         {
 551             for (n = blnr[b]; n < blnr[b+1]; n++)
 552             {
 553                 blcc[n] = blmf[n]*iprod(r[b], r[blbnb[n]]);
 554             }
 555             pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
 556             mvb     = blc[b]*(iprod(r[b], dx) - bllen[b]);
 557             rhs1[b] = mvb;
 558             sol[b]  = mvb;
 559         }
 560     }
 561     else
 562     {
 563         /* Compute normalized i-j vectors */
 564         for (b = b0; b < b1; b++)
 565         {
 566             i       = bla[2*b];
 567             j       = bla[2*b+1];
 568             tmp0    = x[i][0] - x[j][0];
 569             tmp1    = x[i][1] - x[j][1];
 570             tmp2    = x[i][2] - x[j][2];
 571             rlen    = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
 572             r[b][0] = rlen*tmp0;
 573             r[b][1] = rlen*tmp1;
 574             r[b][2] = rlen*tmp2;
 575         } /* 16 ncons flops */
 576
 577 #pragma omp barrier
 578         for (b = b0; b < b1; b++)
 579         {
 580             tmp0 = r[b][0];
 581             tmp1 = r[b][1];
 582             tmp2 = r[b][2];
 583             len  = bllen[b];
 584             i    = bla[2*b];
 585             j    = bla[2*b+1];
 586             for (n = blnr[b]; n < blnr[b+1]; n++)
 587             {
 588                 k       = blbnb[n];
 589                 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 590             } /* 6 nr flops */
 591             mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
 592                           tmp1*(xp[i][1] - xp[j][1]) +
 593                           tmp2*(xp[i][2] - xp[j][2]) - len);
 594             rhs1[b] = mvb;
 595             sol[b]  = mvb;
 596             /* 10 flops */
 597         }
 598         /* Together: 26*ncons + 6*nrtot flops */
 599     }
 600
 601     lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 602     /* nrec*(ncons+2*nrtot) flops */
 603
 604     for (b = b0; b < b1; b++)
 605     {
 606         mlambda[b] = blc[b]*sol[b];
 607     }
 608
 609     /* Update the coordinates */
 610     lincs_update_atoms(lincsd, th, 1.0, mlambda, r, invmass, xp);
 611
 612     /*
 613      ********  Correction for centripetal effects  ********
 614      */
 615
 616     wfac = cos(DEG2RAD*wangle);
 617     wfac = wfac*wfac;
 618
 619     for (iter = 0; iter < lincsd->nIter; iter++)
 620     {
 621         if ((lincsd->bCommIter && DOMAINDECOMP(cr) && cr->dd->constraints))
 622         {
 623 #pragma omp barrier
 624 #pragma omp master
 625             {
 626                 /* Communicate the corrected non-local coordinates */
 627                 if (DOMAINDECOMP(cr))
 628                 {
 629                     dd_move_x_constraints(cr->dd, box, xp, NULL, FALSE);
 630                 }
 631             }
 632         }
 633
 634 #pragma omp barrier
 635         for (b = b0; b < b1; b++)
 636         {
 637             len = bllen[b];
 638             if (pbc)
 639             {
 640                 pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
 641             }
 642             else
 643             {
 644                 rvec_sub(xp[bla[2*b]], xp[bla[2*b+1]], dx);
 645             }
 646             len2  = len*len;
 647             dlen2 = 2*len2 - norm2(dx);
 648             if (dlen2 < wfac*len2 && (nlocat == NULL || nlocat[b]))
 649             {
 650                 /* not race free - see detailed comment in caller */
 651                 *warn = b;
 652             }
 653             if (dlen2 > 0)
 654             {
 655                 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
 656             }
 657             else
 658             {
 659                 mvb = blc[b]*len;
 660             }
 661             rhs1[b] = mvb;
 662             sol[b]  = mvb;
 663         } /* 20*ncons flops */
 664
 665         lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 666         /* nrec*(ncons+2*nrtot) flops */
 667
 668         for (b = b0; b < b1; b++)
 669         {
 670             mvb         = blc[b]*sol[b];
 671             blc_sol[b]  = mvb;
 672             mlambda[b] += mvb;
 673         }
 674
 675         /* Update the coordinates */
 676         lincs_update_atoms(lincsd, th, 1.0, blc_sol, r, invmass, xp);
 677     }
 678     /* nit*ncons*(37+9*nrec) flops */
 679
 680     if (v != NULL)
 681     {
 682         /* Update the velocities */
 683         lincs_update_atoms(lincsd, th, invdt, mlambda, r, invmass, v);
 684         /* 16 ncons flops */
 685     }
 686
 687     if (nlocat != NULL && bCalcLambda)
 688     {
 689         /* In lincs_update_atoms thread might cross-read mlambda */
 690 #pragma omp barrier
 691
 692         /* Only account for local atoms */
 693         for (b = b0; b < b1; b++)
 694         {
 695             mlambda[b] *= 0.5*nlocat[b];
 696         }
 697     }
 698
 699     if (bCalcVir)
 700     {
 701         /* Constraint virial */
 702         for (b = b0; b < b1; b++)
 703         {
 704             tmp0 = -bllen[b]*mlambda[b];
 705             for (i = 0; i < DIM; i++)
 706             {
 707                 tmp1 = tmp0*r[b][i];
 708                 for (j = 0; j < DIM; j++)
 709                 {
 710                     vir_r_m_dr[i][j] -= tmp1*r[b][j];
 711                 }
 712             }
 713         } /* 22 ncons flops */
 714     }
 715
 716     /* Total:
 717      * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
 718      * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
 719      *
 720      * (26+nrec)*ncons + (6+2*nrec)*nrtot
 721      * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
 722      * if nit=1
 723      * (63+nrec)*ncons + (6+4*nrec)*nrtot
 724      */
 725 }
 726
 727 void set_lincs_matrix(struct gmx_lincsdata *li, real *invmass, real lambda)
 728 {
 729     int        i, a1, a2, n, k, sign, center;
 730     int        end, nk, kk;
 731     const real invsqrt2 = 0.7071067811865475244;
 732
 733     for (i = 0; (i < li->nc); i++)
 734     {
 735         a1          = li->bla[2*i];
 736         a2          = li->bla[2*i+1];
 737         li->blc[i]  = gmx_invsqrt(invmass[a1] + invmass[a2]);
 738         li->blc1[i] = invsqrt2;
 739     }
 740
 741     /* Construct the coupling coefficient matrix blmf */
 742     li->ntriangle    = 0;
 743     li->ncc_triangle = 0;
 744     for (i = 0; (i < li->nc); i++)
 745     {
 746         a1 = li->bla[2*i];
 747         a2 = li->bla[2*i+1];
 748         for (n = li->blnr[i]; (n < li->blnr[i+1]); n++)
 749         {
 750             k = li->blbnb[n];
 751             if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
 752             {
 753                 sign = -1;
 754             }
 755             else
 756             {
 757                 sign = 1;
 758             }
 759             if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
 760             {
 761                 center = a1;
 762                 end    = a2;
 763             }
 764             else
 765             {
 766                 center = a2;
 767                 end    = a1;
 768             }
 769             li->blmf[n]  = sign*invmass[center]*li->blc[i]*li->blc[k];
 770             li->blmf1[n] = sign*0.5;
 771             if (li->ncg_triangle > 0)
 772             {
 773                 /* Look for constraint triangles */
 774                 for (nk = li->blnr[k]; (nk < li->blnr[k+1]); nk++)
 775                 {
 776                     kk = li->blbnb[nk];
 777                     if (kk != i && kk != k &&
 778                         (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
 779                     {
 780                         if (li->ntriangle == 0 ||
 781                             li->triangle[li->ntriangle-1] < i)
 782                         {
 783                             /* Add this constraint to the triangle list */
 784                             li->triangle[li->ntriangle] = i;
 785                             li->tri_bits[li->ntriangle] = 0;
 786                             li->ntriangle++;
 787                             if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
 788                             {
 789                                 gmx_fatal(FARGS, "A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
 790                                           li->blnr[i+1] - li->blnr[i],
 791                                           sizeof(li->tri_bits[0])*8-1);
 792                             }
 793                         }
 794                         li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
 795                         li->ncc_triangle++;
 796                     }
 797                 }
 798             }
 799         }
 800     }
 801
 802     if (debug)
 803     {
 804         fprintf(debug, "Of the %d constraints %d participate in triangles\n",
 805                 li->nc, li->ntriangle);
 806         fprintf(debug, "There are %d couplings of which %d in triangles\n",
 807                 li->ncc, li->ncc_triangle);
 808     }
 809
 810     /* Set matlam,
 811      * so we know with which lambda value the masses have been set.
 812      */
 813     li->matlam = lambda;
 814 }
 815
 816 static int count_triangle_constraints(t_ilist *ilist, t_blocka *at2con)
 817 {
 818     int      ncon1, ncon_tot;
 819     int      c0, a00, a01, n1, c1, a10, a11, ac1, n2, c2, a20, a21;
 820     int      ncon_triangle;
 821     gmx_bool bTriangle;
 822     t_iatom *ia1, *ia2, *iap;
 823
 824     ncon1    = ilist[F_CONSTR].nr/3;
 825     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 826
 827     ia1 = ilist[F_CONSTR].iatoms;
 828     ia2 = ilist[F_CONSTRNC].iatoms;
 829
 830     ncon_triangle = 0;
 831     for (c0 = 0; c0 < ncon_tot; c0++)
 832     {
 833         bTriangle = FALSE;
 834         iap       = constr_iatomptr(ncon1, ia1, ia2, c0);
 835         a00       = iap[1];
 836         a01       = iap[2];
 837         for (n1 = at2con->index[a01]; n1 < at2con->index[a01+1]; n1++)
 838         {
 839             c1 = at2con->a[n1];
 840             if (c1 != c0)
 841             {
 842                 iap = constr_iatomptr(ncon1, ia1, ia2, c1);
 843                 a10 = iap[1];
 844                 a11 = iap[2];
 845                 if (a10 == a01)
 846                 {
 847                     ac1 = a11;
 848                 }
 849                 else
 850                 {
 851                     ac1 = a10;
 852                 }
 853                 for (n2 = at2con->index[ac1]; n2 < at2con->index[ac1+1]; n2++)
 854                 {
 855                     c2 = at2con->a[n2];
 856                     if (c2 != c0 && c2 != c1)
 857                     {
 858                         iap = constr_iatomptr(ncon1, ia1, ia2, c2);
 859                         a20 = iap[1];
 860                         a21 = iap[2];
 861                         if (a20 == a00 || a21 == a00)
 862                         {
 863                             bTriangle = TRUE;
 864                         }
 865                     }
 866                 }
 867             }
 868         }
 869         if (bTriangle)
 870         {
 871             ncon_triangle++;
 872         }
 873     }
 874
 875     return ncon_triangle;
 876 }
 877
 878 static gmx_bool more_than_two_sequential_constraints(const t_ilist  *ilist,
 879                                                      const t_blocka *at2con)
 880 {
 881     t_iatom  *ia1, *ia2, *iap;
 882     int       ncon1, ncon_tot, c;
 883     int       a1, a2;
 884     gmx_bool  bMoreThanTwoSequentialConstraints;
 885
 886     ncon1    = ilist[F_CONSTR].nr/3;
 887     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 888
 889     ia1 = ilist[F_CONSTR].iatoms;
 890     ia2 = ilist[F_CONSTRNC].iatoms;
 891
 892     bMoreThanTwoSequentialConstraints = FALSE;
 893     for (c = 0; c < ncon_tot && !bMoreThanTwoSequentialConstraints; c++)
 894     {
 895         iap = constr_iatomptr(ncon1, ia1, ia2, c);
 896         a1  = iap[1];
 897         a2  = iap[2];
 898         /* Check if this constraint has constraints connected at both atoms */
 899         if (at2con->index[a1+1] - at2con->index[a1] > 1 &&
 900             at2con->index[a2+1] - at2con->index[a2] > 1)
 901         {
 902             bMoreThanTwoSequentialConstraints = TRUE;
 903         }
 904     }
 905
 906     return bMoreThanTwoSequentialConstraints;
 907 }
 908
 909 static int int_comp(const void *a, const void *b)
 910 {
 911     return (*(int *)a) - (*(int *)b);
 912 }
 913
 914 gmx_lincsdata_t init_lincs(FILE *fplog, gmx_mtop_t *mtop,
 915                            int nflexcon_global, t_blocka *at2con,
 916                            gmx_bool bPLINCS, int nIter, int nProjOrder)
 917 {
 918     struct gmx_lincsdata *li;
 919     int                   mb;
 920     gmx_moltype_t        *molt;
 921
 922     if (fplog)
 923     {
 924         fprintf(fplog, "\nInitializing%s LINear Constraint Solver\n",
 925                 bPLINCS ? " Parallel" : "");
 926     }
 927
 928     snew(li, 1);
 929
 930     li->ncg      =
 931         gmx_mtop_ftype_count(mtop, F_CONSTR) +
 932         gmx_mtop_ftype_count(mtop, F_CONSTRNC);
 933     li->ncg_flex = nflexcon_global;
 934
 935     li->nIter  = nIter;
 936     li->nOrder = nProjOrder;
 937
 938     li->ncg_triangle = 0;
 939     li->bCommIter    = FALSE;
 940     for (mb = 0; mb < mtop->nmolblock; mb++)
 941     {
 942         molt              = &mtop->moltype[mtop->molblock[mb].type];
 943         li->ncg_triangle +=
 944             mtop->molblock[mb].nmol*
 945             count_triangle_constraints(molt->ilist,
 946                                        &at2con[mtop->molblock[mb].type]);
 947         if (bPLINCS && li->bCommIter == FALSE)
 948         {
 949             /* Check if we need to communicate not only before LINCS,
 950              * but also before each iteration.
 951              * The check for only two sequential constraints is only
 952              * useful for the common case of H-bond only constraints.
 953              * With more effort we could also make it useful for small
 954              * molecules with nr. sequential constraints <= nOrder-1.
 955              */
 956             li->bCommIter = (li->nOrder < 1 || more_than_two_sequential_constraints(molt->ilist, &at2con[mtop->molblock[mb].type]));
 957         }
 958     }
 959     if (debug && bPLINCS)
 960     {
 961         fprintf(debug, "PLINCS communication before each iteration: %d\n",
 962                 li->bCommIter);
 963     }
 964
 965     /* LINCS can run on any number of threads.
 966      * Currently the number is fixed for the whole simulation,
 967      * but it could be set in set_lincs().
 968      */
 969     li->nth = gmx_omp_nthreads_get(emntLINCS);
 970     if (li->nth == 1)
 971     {
 972         snew(li->th, 1);
 973     }
 974     else
 975     {
 976         /* Allocate an extra elements for "thread-overlap" constraints */
 977         snew(li->th, li->nth+1);
 978     }
 979     if (debug)
 980     {
 981         fprintf(debug, "LINCS: using %d threads\n", li->nth);
 982     }
 983
 984     if (bPLINCS || li->ncg_triangle > 0)
 985     {
 986         please_cite(fplog, "Hess2008a");
 987     }
 988     else
 989     {
 990         please_cite(fplog, "Hess97a");
 991     }
 992
 993     if (fplog)
 994     {
 995         fprintf(fplog, "The number of constraints is %d\n", li->ncg);
 996         if (bPLINCS)
 997         {
 998             fprintf(fplog, "There are inter charge-group constraints,\n"
 999                     "will communicate selected coordinates each lincs iteration\n");
1000         }
1001         if (li->ncg_triangle > 0)
1002         {
1003             fprintf(fplog,
1004                     "%d constraints are involved in constraint triangles,\n"
1005                     "will apply an additional matrix expansion of order %d for couplings\n"
1006                     "between constraints inside triangles\n",
1007                     li->ncg_triangle, li->nOrder);
1008         }
1009     }
1010
1011     return li;
1012 }
1013
1014 /* Sets up the work division over the threads */
1015 static void lincs_thread_setup(struct gmx_lincsdata *li, int natoms)
1016 {
1017     lincs_thread_t *li_m;
1018     int             th;
1019     unsigned       *atf;
1020     int             a;
1021
1022     if (natoms > li->atf_nalloc)
1023     {
1024         li->atf_nalloc = over_alloc_large(natoms);
1025         srenew(li->atf, li->atf_nalloc);
1026     }
1027
1028     atf = li->atf;
1029     /* Clear the atom flags */
1030     for (a = 0; a < natoms; a++)
1031     {
1032         atf[a] = 0;
1033     }
1034
1035     for (th = 0; th < li->nth; th++)
1036     {
1037         lincs_thread_t *li_th;
1038         int             b;
1039
1040         li_th = &li->th[th];
1041
1042         /* The constraints are divided equally over the threads */
1043         li_th->b0 = (li->nc* th   )/li->nth;
1044         li_th->b1 = (li->nc*(th+1))/li->nth;
1045
1046         if (th < sizeof(*atf)*8)
1047         {
1048             /* For each atom set a flag for constraints from each */
1049             for (b = li_th->b0; b < li_th->b1; b++)
1050             {
1051                 atf[li->bla[b*2]  ] |= (1U<<th);
1052                 atf[li->bla[b*2+1]] |= (1U<<th);
1053             }
1054         }
1055     }
1056
1057 #pragma omp parallel for num_threads(li->nth) schedule(static)
1058     for (th = 0; th < li->nth; th++)
1059     {
1060         lincs_thread_t *li_th;
1061         unsigned        mask;
1062         int             b;
1063
1064         li_th = &li->th[th];
1065
1066         if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
1067         {
1068             li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
1069             srenew(li_th->ind, li_th->ind_nalloc);
1070             srenew(li_th->ind_r, li_th->ind_nalloc);
1071         }
1072
1073         if (th < sizeof(*atf)*8)
1074         {
1075             mask = (1U<<th) - 1U;
1076
1077             li_th->nind   = 0;
1078             li_th->nind_r = 0;
1079             for (b = li_th->b0; b < li_th->b1; b++)
1080             {
1081                 /* We let the constraint with the lowest thread index
1082                  * operate on atoms with constraints from multiple threads.
1083                  */
1084                 if (((atf[li->bla[b*2]]   & mask) == 0) &&
1085                     ((atf[li->bla[b*2+1]] & mask) == 0))
1086                 {
1087                     /* Add the constraint to the local atom update index */
1088                     li_th->ind[li_th->nind++] = b;
1089                 }
1090                 else
1091                 {
1092                     /* Add the constraint to the rest block */
1093                     li_th->ind_r[li_th->nind_r++] = b;
1094                 }
1095             }
1096         }
1097         else
1098         {
1099             /* We are out of bits, assign all constraints to rest */
1100             for (b = li_th->b0; b < li_th->b1; b++)
1101             {
1102                 li_th->ind_r[li_th->nind_r++] = b;
1103             }
1104         }
1105     }
1106
1107     /* We need to copy all constraints which have not be assigned
1108      * to a thread to a separate list which will be handled by one thread.
1109      */
1110     li_m = &li->th[li->nth];
1111
1112     li_m->nind = 0;
1113     for (th = 0; th < li->nth; th++)
1114     {
1115         lincs_thread_t *li_th;
1116         int             b;
1117
1118         li_th   = &li->th[th];
1119
1120         if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1121         {
1122             li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1123             srenew(li_m->ind, li_m->ind_nalloc);
1124         }
1125
1126         for (b = 0; b < li_th->nind_r; b++)
1127         {
1128             li_m->ind[li_m->nind++] = li_th->ind_r[b];
1129         }
1130
1131         if (debug)
1132         {
1133             fprintf(debug, "LINCS thread %d: %d constraints\n",
1134                     th, li_th->nind);
1135         }
1136     }
1137
1138     if (debug)
1139     {
1140         fprintf(debug, "LINCS thread r: %d constraints\n",
1141                 li_m->nind);
1142     }
1143 }
1144
1145
1146 void set_lincs(t_idef *idef, t_mdatoms *md,
1147                gmx_bool bDynamics, t_commrec *cr,
1148                struct gmx_lincsdata *li)
1149 {
1150     int          start, natoms, nflexcon;
1151     t_blocka     at2con;
1152     t_iatom     *iatom;
1153     int          i, k, ncc_alloc, ni, con, nconnect, concon;
1154     int          type, a1, a2;
1155     real         lenA = 0, lenB;
1156     gmx_bool     bLocal;
1157
1158     li->nc  = 0;
1159     li->ncc = 0;
1160     /* Zero the thread index ranges.
1161      * Otherwise without local constraints we could return with old ranges.
1162      */
1163     for (i = 0; i < li->nth; i++)
1164     {
1165         li->th[i].b0   = 0;
1166         li->th[i].b1   = 0;
1167         li->th[i].nind = 0;
1168     }
1169     if (li->nth > 1)
1170     {
1171         li->th[li->nth].nind = 0;
1172     }
1173
1174     /* This is the local topology, so there are only F_CONSTR constraints */
1175     if (idef->il[F_CONSTR].nr == 0)
1176     {
1177         /* There are no constraints,
1178          * we do not need to fill any data structures.
1179          */
1180         return;
1181     }
1182
1183     if (debug)
1184     {
1185         fprintf(debug, "Building the LINCS connectivity\n");
1186     }
1187
1188     if (DOMAINDECOMP(cr))
1189     {
1190         if (cr->dd->constraints)
1191         {
1192             dd_get_constraint_range(cr->dd, &start, &natoms);
1193         }
1194         else
1195         {
1196             natoms = cr->dd->nat_home;
1197         }
1198         start = 0;
1199     }
1200     else
1201     {
1202         start  = 0;
1203         natoms = md->homenr;
1204     }
1205     at2con = make_at2con(start, natoms, idef->il, idef->iparams, bDynamics,
1206                          &nflexcon);
1207
1208
1209     if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1210     {
1211         li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1212         srenew(li->bllen0, li->nc_alloc);
1213         srenew(li->ddist, li->nc_alloc);
1214         srenew(li->bla, 2*li->nc_alloc);
1215         srenew(li->blc, li->nc_alloc);
1216         srenew(li->blc1, li->nc_alloc);
1217         srenew(li->blnr, li->nc_alloc+1);
1218         srenew(li->bllen, li->nc_alloc);
1219         srenew(li->tmpv, li->nc_alloc);
1220         srenew(li->tmp1, li->nc_alloc);
1221         srenew(li->tmp2, li->nc_alloc);
1222         srenew(li->tmp3, li->nc_alloc);
1223         srenew(li->tmp4, li->nc_alloc);
1224         srenew(li->mlambda, li->nc_alloc);
1225         if (li->ncg_triangle > 0)
1226         {
1227             /* This is allocating too much, but it is difficult to improve */
1228             srenew(li->triangle, li->nc_alloc);
1229             srenew(li->tri_bits, li->nc_alloc);
1230         }
1231     }
1232
1233     iatom = idef->il[F_CONSTR].iatoms;
1234
1235     ncc_alloc   = li->ncc_alloc;
1236     li->blnr[0] = 0;
1237
1238     ni = idef->il[F_CONSTR].nr/3;
1239
1240     con           = 0;
1241     nconnect      = 0;
1242     li->blnr[con] = nconnect;
1243     for (i = 0; i < ni; i++)
1244     {
1245         bLocal = TRUE;
1246         type   = iatom[3*i];
1247         a1     = iatom[3*i+1];
1248         a2     = iatom[3*i+2];
1249         lenA   = idef->iparams[type].constr.dA;
1250         lenB   = idef->iparams[type].constr.dB;
1251         /* Skip the flexible constraints when not doing dynamics */
1252         if (bDynamics || lenA != 0 || lenB != 0)
1253         {
1254             li->bllen0[con]  = lenA;
1255             li->ddist[con]   = lenB - lenA;
1256             /* Set the length to the topology A length */
1257             li->bllen[con]   = li->bllen0[con];
1258             li->bla[2*con]   = a1;
1259             li->bla[2*con+1] = a2;
1260             /* Construct the constraint connection matrix blbnb */
1261             for (k = at2con.index[a1-start]; k < at2con.index[a1-start+1]; k++)
1262             {
1263                 concon = at2con.a[k];
1264                 if (concon != i)
1265                 {
1266                     if (nconnect >= ncc_alloc)
1267                     {
1268                         ncc_alloc = over_alloc_small(nconnect+1);
1269                         srenew(li->blbnb, ncc_alloc);
1270                     }
1271                     li->blbnb[nconnect++] = concon;
1272                 }
1273             }
1274             for (k = at2con.index[a2-start]; k < at2con.index[a2-start+1]; k++)
1275             {
1276                 concon = at2con.a[k];
1277                 if (concon != i)
1278                 {
1279                     if (nconnect+1 > ncc_alloc)
1280                     {
1281                         ncc_alloc = over_alloc_small(nconnect+1);
1282                         srenew(li->blbnb, ncc_alloc);
1283                     }
1284                     li->blbnb[nconnect++] = concon;
1285                 }
1286             }
1287             li->blnr[con+1] = nconnect;
1288
1289             if (cr->dd == NULL)
1290             {
1291                 /* Order the blbnb matrix to optimize memory access */
1292                 qsort(&(li->blbnb[li->blnr[con]]), li->blnr[con+1]-li->blnr[con],
1293                       sizeof(li->blbnb[0]), int_comp);
1294             }
1295             /* Increase the constraint count */
1296             con++;
1297         }
1298     }
1299
1300     done_blocka(&at2con);
1301
1302     /* This is the real number of constraints,
1303      * without dynamics the flexible constraints are not present.
1304      */
1305     li->nc = con;
1306
1307     li->ncc = li->blnr[con];
1308     if (cr->dd == NULL)
1309     {
1310         /* Since the matrix is static, we can free some memory */
1311         ncc_alloc = li->ncc;
1312         srenew(li->blbnb, ncc_alloc);
1313     }
1314
1315     if (ncc_alloc > li->ncc_alloc)
1316     {
1317         li->ncc_alloc = ncc_alloc;
1318         srenew(li->blmf, li->ncc_alloc);
1319         srenew(li->blmf1, li->ncc_alloc);
1320         srenew(li->tmpncc, li->ncc_alloc);
1321     }
1322
1323     if (debug)
1324     {
1325         fprintf(debug, "Number of constraints is %d, couplings %d\n",
1326                 li->nc, li->ncc);
1327     }
1328
1329     if (li->nth == 1)
1330     {
1331         li->th[0].b0 = 0;
1332         li->th[0].b1 = li->nc;
1333     }
1334     else
1335     {
1336         lincs_thread_setup(li, md->nr);
1337     }
1338
1339     set_lincs_matrix(li, md->invmass, md->lambda);
1340 }
1341
1342 static void lincs_warning(FILE *fplog,
1343                           gmx_domdec_t *dd, rvec *x, rvec *xprime, t_pbc *pbc,
1344                           int ncons, int *bla, real *bllen, real wangle,
1345                           int maxwarn, int *warncount)
1346 {
1347     int  b, i, j;
1348     rvec v0, v1;
1349     real wfac, d0, d1, cosine;
1350     char buf[STRLEN];
1351
1352     wfac = cos(DEG2RAD*wangle);
1353
1354     sprintf(buf, "bonds that rotated more than %g degrees:\n"
1355             " atom 1 atom 2  angle  previous, current, constraint length\n",
1356             wangle);
1357     fprintf(stderr, "%s", buf);
1358     if (fplog)
1359     {
1360         fprintf(fplog, "%s", buf);
1361     }
1362
1363     for (b = 0; b < ncons; b++)
1364     {
1365         i = bla[2*b];
1366         j = bla[2*b+1];
1367         if (pbc)
1368         {
1369             pbc_dx_aiuc(pbc, x[i], x[j], v0);
1370             pbc_dx_aiuc(pbc, xprime[i], xprime[j], v1);
1371         }
1372         else
1373         {
1374             rvec_sub(x[i], x[j], v0);
1375             rvec_sub(xprime[i], xprime[j], v1);
1376         }
1377         d0     = norm(v0);
1378         d1     = norm(v1);
1379         cosine = iprod(v0, v1)/(d0*d1);
1380         if (cosine < wfac)
1381         {
1382             sprintf(buf, " %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n",
1383                     ddglatnr(dd, i), ddglatnr(dd, j),
1384                     RAD2DEG*acos(cosine), d0, d1, bllen[b]);
1385             fprintf(stderr, "%s", buf);
1386             if (fplog)
1387             {
1388                 fprintf(fplog, "%s", buf);
1389             }
1390             if (!gmx_isfinite(d1))
1391             {
1392                 gmx_fatal(FARGS, "Bond length not finite.");
1393             }
1394
1395             (*warncount)++;
1396         }
1397     }
1398     if (*warncount > maxwarn)
1399     {
1400         too_many_constraint_warnings(econtLINCS, *warncount);
1401     }
1402 }
1403
1404 static void cconerr(gmx_domdec_t *dd,
1405                     int ncons, int *bla, real *bllen, rvec *x, t_pbc *pbc,
1406                     real *ncons_loc, real *ssd, real *max, int *imax)
1407 {
1408     real       len, d, ma, ssd2, r2;
1409     int       *nlocat, count, b, im;
1410     rvec       dx;
1411
1412     if (dd && dd->constraints)
1413     {
1414         nlocat = dd_constraints_nlocalatoms(dd);
1415     }
1416     else
1417     {
1418         nlocat = 0;
1419     }
1420
1421     ma    = 0;
1422     ssd2  = 0;
1423     im    = 0;
1424     count = 0;
1425     for (b = 0; b < ncons; b++)
1426     {
1427         if (pbc)
1428         {
1429             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
1430         }
1431         else
1432         {
1433             rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
1434         }
1435         r2  = norm2(dx);
1436         len = r2*gmx_invsqrt(r2);
1437         d   = fabs(len/bllen[b]-1);
1438         if (d > ma && (nlocat == NULL || nlocat[b]))
1439         {
1440             ma = d;
1441             im = b;
1442         }
1443         if (nlocat == NULL)
1444         {
1445             ssd2 += d*d;
1446             count++;
1447         }
1448         else
1449         {
1450             ssd2  += nlocat[b]*d*d;
1451             count += nlocat[b];
1452         }
1453     }
1454
1455     *ncons_loc = (nlocat ? 0.5 : 1)*count;
1456     *ssd       = (nlocat ? 0.5 : 1)*ssd2;
1457     *max       = ma;
1458     *imax      = im;
1459 }
1460
1461 gmx_bool constrain_lincs(FILE *fplog, gmx_bool bLog, gmx_bool bEner,
1462                          t_inputrec *ir,
1463                          gmx_int64_t step,
1464                          struct gmx_lincsdata *lincsd, t_mdatoms *md,
1465                          t_commrec *cr,
1466                          rvec *x, rvec *xprime, rvec *min_proj,
1467                          matrix box, t_pbc *pbc,
1468                          real lambda, real *dvdlambda,
1469                          real invdt, rvec *v,
1470                          gmx_bool bCalcVir, tensor vir_r_m_dr,
1471                          int econq,
1472                          t_nrnb *nrnb,
1473                          int maxwarn, int *warncount)
1474 {
1475     char      buf[STRLEN], buf2[22], buf3[STRLEN];
1476     int       i, warn, p_imax, error;
1477     real      ncons_loc, p_ssd, p_max = 0;
1478     rvec      dx;
1479     gmx_bool  bOK;
1480
1481     bOK = TRUE;
1482
1483     if (lincsd->nc == 0 && cr->dd == NULL)
1484     {
1485         if (bLog || bEner)
1486         {
1487             lincsd->rmsd_data[0] = 0;
1488             if (ir->eI == eiSD2 && v == NULL)
1489             {
1490                 i = 2;
1491             }
1492             else
1493             {
1494                 i = 1;
1495             }
1496             lincsd->rmsd_data[i] = 0;
1497         }
1498
1499         return bOK;
1500     }
1501
1502     if (econq == econqCoord)
1503     {
1504         if (ir->efep != efepNO)
1505         {
1506             if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1507             {
1508                 set_lincs_matrix(lincsd, md->invmass, md->lambda);
1509             }
1510
1511             for (i = 0; i < lincsd->nc; i++)
1512             {
1513                 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1514             }
1515         }
1516
1517         if (lincsd->ncg_flex)
1518         {
1519             /* Set the flexible constraint lengths to the old lengths */
1520             if (pbc != NULL)
1521             {
1522                 for (i = 0; i < lincsd->nc; i++)
1523                 {
1524                     if (lincsd->bllen[i] == 0)
1525                     {
1526                         pbc_dx_aiuc(pbc, x[lincsd->bla[2*i]], x[lincsd->bla[2*i+1]], dx);
1527                         lincsd->bllen[i] = norm(dx);
1528                     }
1529                 }
1530             }
1531             else
1532             {
1533                 for (i = 0; i < lincsd->nc; i++)
1534                 {
1535                     if (lincsd->bllen[i] == 0)
1536                     {
1537                         lincsd->bllen[i] =
1538                             sqrt(distance2(x[lincsd->bla[2*i]],
1539                                            x[lincsd->bla[2*i+1]]));
1540                     }
1541                 }
1542             }
1543         }
1544
1545         if (bLog && fplog)
1546         {
1547             cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1548                     &ncons_loc, &p_ssd, &p_max, &p_imax);
1549         }
1550
1551         /* This warn var can be updated by multiple threads
1552          * at the same time. But as we only need to detect
1553          * if a warning occured or not, this is not an issue.
1554          */
1555         warn = -1;
1556
1557         /* The OpenMP parallel region of constrain_lincs for coords */
1558 #pragma omp parallel num_threads(lincsd->nth)
1559         {
1560             int th = gmx_omp_get_thread_num();
1561
1562             clear_mat(lincsd->th[th].vir_r_m_dr);
1563
1564             do_lincs(x, xprime, box, pbc, lincsd, th,
1565                      md->invmass, cr,
1566                      bCalcVir || (ir->efep != efepNO),
1567                      ir->LincsWarnAngle, &warn,
1568                      invdt, v, bCalcVir,
1569                      th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1570         }
1571
1572         if (ir->efep != efepNO)
1573         {
1574             real dt_2, dvdl = 0;
1575
1576             dt_2 = 1.0/(ir->delta_t*ir->delta_t);
1577             for (i = 0; (i < lincsd->nc); i++)
1578             {
1579                 dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
1580             }
1581             *dvdlambda += dvdl;
1582         }
1583
1584         if (bLog && fplog && lincsd->nc > 0)
1585         {
1586             fprintf(fplog, "   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
1587             fprintf(fplog, "       Before LINCS          %.6f    %.6f %6d %6d\n",
1588                     sqrt(p_ssd/ncons_loc), p_max,
1589                     ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1590                     ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1591         }
1592         if (bLog || bEner)
1593         {
1594             cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1595                     &ncons_loc, &p_ssd, &p_max, &p_imax);
1596             /* Check if we are doing the second part of SD */
1597             if (ir->eI == eiSD2 && v == NULL)
1598             {
1599                 i = 2;
1600             }
1601             else
1602             {
1603                 i = 1;
1604             }
1605             lincsd->rmsd_data[0] = ncons_loc;
1606             lincsd->rmsd_data[i] = p_ssd;
1607         }
1608         else
1609         {
1610             lincsd->rmsd_data[0] = 0;
1611             lincsd->rmsd_data[1] = 0;
1612             lincsd->rmsd_data[2] = 0;
1613         }
1614         if (bLog && fplog && lincsd->nc > 0)
1615         {
1616             fprintf(fplog,
1617                     "        After LINCS          %.6f    %.6f %6d %6d\n\n",
1618                     sqrt(p_ssd/ncons_loc), p_max,
1619                     ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1620                     ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1621         }
1622
1623         if (warn >= 0)
1624         {
1625             if (maxwarn >= 0)
1626             {
1627                 cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1628                         &ncons_loc, &p_ssd, &p_max, &p_imax);
1629                 if (MULTISIM(cr))
1630                 {
1631                     sprintf(buf3, " in simulation %d", cr->ms->sim);
1632                 }
1633                 else
1634                 {
1635                     buf3[0] = 0;
1636                 }
1637                 sprintf(buf, "\nStep %s, time %g (ps)  LINCS WARNING%s\n"
1638                         "relative constraint deviation after LINCS:\n"
1639                         "rms %.6f, max %.6f (between atoms %d and %d)\n",
1640                         gmx_step_str(step, buf2), ir->init_t+step*ir->delta_t,
1641                         buf3,
1642                         sqrt(p_ssd/ncons_loc), p_max,
1643                         ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1644                         ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1645                 if (fplog)
1646                 {
1647                     fprintf(fplog, "%s", buf);
1648                 }
1649                 fprintf(stderr, "%s", buf);
1650                 lincs_warning(fplog, cr->dd, x, xprime, pbc,
1651                               lincsd->nc, lincsd->bla, lincsd->bllen,
1652                               ir->LincsWarnAngle, maxwarn, warncount);
1653             }
1654             bOK = (p_max < 0.5);
1655         }
1656
1657         if (lincsd->ncg_flex)
1658         {
1659             for (i = 0; (i < lincsd->nc); i++)
1660             {
1661                 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1662                 {
1663                     lincsd->bllen[i] = 0;
1664                 }
1665             }
1666         }
1667     }
1668     else
1669     {
1670         /* The OpenMP parallel region of constrain_lincs for derivatives */
1671 #pragma omp parallel num_threads(lincsd->nth)
1672         {
1673             int th = gmx_omp_get_thread_num();
1674
1675             do_lincsp(x, xprime, min_proj, pbc, lincsd, th,
1676                       md->invmass, econq, ir->efep != efepNO ? dvdlambda : NULL,
1677                       bCalcVir, th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1678         }
1679     }
1680
1681     if (bCalcVir && lincsd->nth > 1)
1682     {
1683         for (i = 1; i < lincsd->nth; i++)
1684         {
1685             m_add(vir_r_m_dr, lincsd->th[i].vir_r_m_dr, vir_r_m_dr);
1686         }
1687     }
1688
1689     /* count assuming nit=1 */
1690     inc_nrnb(nrnb, eNR_LINCS, lincsd->nc);
1691     inc_nrnb(nrnb, eNR_LINCSMAT, (2+lincsd->nOrder)*lincsd->ncc);
1692     if (lincsd->ntriangle > 0)
1693     {
1694         inc_nrnb(nrnb, eNR_LINCSMAT, lincsd->nOrder*lincsd->ncc_triangle);
1695     }
1696     if (v)
1697     {
1698         inc_nrnb(nrnb, eNR_CONSTR_V, lincsd->nc*2);
1699     }
1700     if (bCalcVir)
1701     {
1702         inc_nrnb(nrnb, eNR_CONSTR_VIR, lincsd->nc);
1703     }
1704
1705     return bOK;
1706 }