src/gromacs/mdlib/clincs.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /* This file is completely threadsafe - keep it that way! */
  38 #include "gmxpre.h"
  39
  40 #include <math.h>
  41 #include <stdlib.h>
  42
  43 #include "gromacs/domdec/domdec.h"
  44 #include "gromacs/fileio/gmxfio.h"
  45 #include "gromacs/legacyheaders/constr.h"
  46 #include "gromacs/legacyheaders/copyrite.h"
  47 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
  48 #include "gromacs/legacyheaders/mdrun.h"
  49 #include "gromacs/legacyheaders/nrnb.h"
  50 #include "gromacs/legacyheaders/types/commrec.h"
  51 #include "gromacs/math/units.h"
  52 #include "gromacs/math/vec.h"
  53 #include "gromacs/mdlib/bitmask.h"
  54 #include "gromacs/pbcutil/pbc.h"
  55 #include "gromacs/topology/block.h"
  56 #include "gromacs/topology/mtop_util.h"
  57 #include "gromacs/utility/fatalerror.h"
  58 #include "gromacs/utility/gmxomp.h"
  59 #include "gromacs/utility/smalloc.h"
  60
  61 typedef struct {
  62     int    b0;         /* first constraint for this thread */
  63     int    b1;         /* b1-1 is the last constraint for this thread */
  64     int    nind;       /* number of indices */
  65     int   *ind;        /* constraint index for updating atom data */
  66     int    nind_r;     /* number of indices */
  67     int   *ind_r;      /* constraint index for updating atom data */
  68     int    ind_nalloc; /* allocation size of ind and ind_r */
  69     tensor vir_r_m_dr; /* temporary variable for virial calculation */
  70     real   dhdlambda;  /* temporary variable for lambda derivative */
  71 } lincs_thread_t;
  72
  73 typedef struct gmx_lincsdata {
  74     int             ncg;          /* the global number of constraints */
  75     int             ncg_flex;     /* the global number of flexible constraints */
  76     int             ncg_triangle; /* the global number of constraints in triangles */
  77     int             nIter;        /* the number of iterations */
  78     int             nOrder;       /* the order of the matrix expansion */
  79     int             nc;           /* the number of constraints */
  80     int             nc_alloc;     /* the number we allocated memory for */
  81     int             ncc;          /* the number of constraint connections */
  82     int             ncc_alloc;    /* the number we allocated memory for */
  83     real            matlam;       /* the FE lambda value used for filling blc and blmf */
  84     real           *bllen0;       /* the reference distance in topology A */
  85     real           *ddist;        /* the reference distance in top B - the r.d. in top A */
  86     int            *bla;          /* the atom pairs involved in the constraints */
  87     real           *blc;          /* 1/sqrt(invmass1 + invmass2) */
  88     real           *blc1;         /* as blc, but with all masses 1 */
  89     int            *blnr;         /* index into blbnb and blmf */
  90     int            *blbnb;        /* list of constraint connections */
  91     int             ntriangle;    /* the local number of constraints in triangles */
  92     int            *triangle;     /* the list of triangle constraints */
  93     int            *tri_bits;     /* the bits tell if the matrix element should be used */
  94     int             ncc_triangle; /* the number of constraint connections in triangles */
  95     gmx_bool        bCommIter;    /* communicate before each LINCS interation */
  96     real           *blmf;         /* matrix of mass factors for constraint connections */
  97     real           *blmf1;        /* as blmf, but with all masses 1 */
  98     real           *bllen;        /* the reference bond length */
  99     int             nth;          /* The number of threads doing LINCS */
 100     lincs_thread_t *th;           /* LINCS thread division */
 101     gmx_bitmask_t  *atf;          /* atom flags for thread parallelization */
 102     int             atf_nalloc;   /* allocation size of atf */
 103     /* arrays for temporary storage in the LINCS algorithm */
 104     rvec           *tmpv;
 105     real           *tmpncc;
 106     real           *tmp1;
 107     real           *tmp2;
 108     real           *tmp3;
 109     real           *tmp4;
 110     real           *mlambda; /* the Lagrange multipliers * -1 */
 111     /* storage for the constraint RMS relative deviation output */
 112     real            rmsd_data[3];
 113 } t_gmx_lincsdata;
 114
 115 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
 116 {
 117     return lincsd->rmsd_data;
 118 }
 119
 120 real lincs_rmsd(struct gmx_lincsdata *lincsd, gmx_bool bSD2)
 121 {
 122     if (lincsd->rmsd_data[0] > 0)
 123     {
 124         return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
 125     }
 126     else
 127     {
 128         return 0;
 129     }
 130 }
 131
 132 /* Do a set of nrec LINCS matrix multiplications.
 133  * This function will return with up to date thread-local
 134  * constraint data, without an OpenMP barrier.
 135  */
 136 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
 137                                 int b0, int b1,
 138                                 const real *blcc,
 139                                 real *rhs1, real *rhs2, real *sol)
 140 {
 141     int        nrec, rec, b, j, n, nr0, nr1;
 142     real       mvb, *swap;
 143     int        ntriangle, tb, bits;
 144     const int *blnr     = lincsd->blnr, *blbnb = lincsd->blbnb;
 145     const int *triangle = lincsd->triangle, *tri_bits = lincsd->tri_bits;
 146
 147     ntriangle = lincsd->ntriangle;
 148     nrec      = lincsd->nOrder;
 149
 150     for (rec = 0; rec < nrec; rec++)
 151     {
 152 #pragma omp barrier
 153         for (b = b0; b < b1; b++)
 154         {
 155             mvb = 0;
 156             for (n = blnr[b]; n < blnr[b+1]; n++)
 157             {
 158                 j   = blbnb[n];
 159                 mvb = mvb + blcc[n]*rhs1[j];
 160             }
 161             rhs2[b] = mvb;
 162             sol[b]  = sol[b] + mvb;
 163         }
 164         swap = rhs1;
 165         rhs1 = rhs2;
 166         rhs2 = swap;
 167     } /* nrec*(ncons+2*nrtot) flops */
 168
 169     if (ntriangle > 0)
 170     {
 171         /* Perform an extra nrec recursions for only the constraints
 172          * involved in rigid triangles.
 173          * In this way their accuracy should come close to those of the other
 174          * constraints, since traingles of constraints can produce eigenvalues
 175          * around 0.7, while the effective eigenvalue for bond constraints
 176          * is around 0.4 (and 0.7*0.7=0.5).
 177          */
 178         /* We need to copy the temporary array, since only the elements
 179          * for constraints involved in triangles are updated and then
 180          * the pointers are swapped. This saving copying the whole arrary.
 181          * We need barrier as other threads might still be reading from rhs2.
 182          */
 183 #pragma omp barrier
 184         for (b = b0; b < b1; b++)
 185         {
 186             rhs2[b] = rhs1[b];
 187         }
 188 #pragma omp barrier
 189 #pragma omp master
 190         {
 191             for (rec = 0; rec < nrec; rec++)
 192             {
 193                 for (tb = 0; tb < ntriangle; tb++)
 194                 {
 195                     b    = triangle[tb];
 196                     bits = tri_bits[tb];
 197                     mvb  = 0;
 198                     nr0  = blnr[b];
 199                     nr1  = blnr[b+1];
 200                     for (n = nr0; n < nr1; n++)
 201                     {
 202                         if (bits & (1<<(n-nr0)))
 203                         {
 204                             j   = blbnb[n];
 205                             mvb = mvb + blcc[n]*rhs1[j];
 206                         }
 207                     }
 208                     rhs2[b] = mvb;
 209                     sol[b]  = sol[b] + mvb;
 210                 }
 211                 swap = rhs1;
 212                 rhs1 = rhs2;
 213                 rhs2 = swap;
 214             }
 215         } /* flops count is missing here */
 216
 217         /* We need a barrier here as the calling routine will continue
 218          * to operate on the thread-local constraints without barrier.
 219          */
 220 #pragma omp barrier
 221     }
 222 }
 223
 224 static void lincs_update_atoms_noind(int ncons, const int *bla,
 225                                      real prefac,
 226                                      const real *fac, rvec *r,
 227                                      const real *invmass,
 228                                      rvec *x)
 229 {
 230     int  b, i, j;
 231     real mvb, im1, im2, tmp0, tmp1, tmp2;
 232
 233     if (invmass != NULL)
 234     {
 235         for (b = 0; b < ncons; b++)
 236         {
 237             i        = bla[2*b];
 238             j        = bla[2*b+1];
 239             mvb      = prefac*fac[b];
 240             im1      = invmass[i];
 241             im2      = invmass[j];
 242             tmp0     = r[b][0]*mvb;
 243             tmp1     = r[b][1]*mvb;
 244             tmp2     = r[b][2]*mvb;
 245             x[i][0] -= tmp0*im1;
 246             x[i][1] -= tmp1*im1;
 247             x[i][2] -= tmp2*im1;
 248             x[j][0] += tmp0*im2;
 249             x[j][1] += tmp1*im2;
 250             x[j][2] += tmp2*im2;
 251         } /* 16 ncons flops */
 252     }
 253     else
 254     {
 255         for (b = 0; b < ncons; b++)
 256         {
 257             i        = bla[2*b];
 258             j        = bla[2*b+1];
 259             mvb      = prefac*fac[b];
 260             tmp0     = r[b][0]*mvb;
 261             tmp1     = r[b][1]*mvb;
 262             tmp2     = r[b][2]*mvb;
 263             x[i][0] -= tmp0;
 264             x[i][1] -= tmp1;
 265             x[i][2] -= tmp2;
 266             x[j][0] += tmp0;
 267             x[j][1] += tmp1;
 268             x[j][2] += tmp2;
 269         }
 270     }
 271 }
 272
 273 static void lincs_update_atoms_ind(int ncons, const int *ind, const int *bla,
 274                                    real prefac,
 275                                    const real *fac, rvec *r,
 276                                    const real *invmass,
 277                                    rvec *x)
 278 {
 279     int  bi, b, i, j;
 280     real mvb, im1, im2, tmp0, tmp1, tmp2;
 281
 282     if (invmass != NULL)
 283     {
 284         for (bi = 0; bi < ncons; bi++)
 285         {
 286             b        = ind[bi];
 287             i        = bla[2*b];
 288             j        = bla[2*b+1];
 289             mvb      = prefac*fac[b];
 290             im1      = invmass[i];
 291             im2      = invmass[j];
 292             tmp0     = r[b][0]*mvb;
 293             tmp1     = r[b][1]*mvb;
 294             tmp2     = r[b][2]*mvb;
 295             x[i][0] -= tmp0*im1;
 296             x[i][1] -= tmp1*im1;
 297             x[i][2] -= tmp2*im1;
 298             x[j][0] += tmp0*im2;
 299             x[j][1] += tmp1*im2;
 300             x[j][2] += tmp2*im2;
 301         } /* 16 ncons flops */
 302     }
 303     else
 304     {
 305         for (bi = 0; bi < ncons; bi++)
 306         {
 307             b        = ind[bi];
 308             i        = bla[2*b];
 309             j        = bla[2*b+1];
 310             mvb      = prefac*fac[b];
 311             tmp0     = r[b][0]*mvb;
 312             tmp1     = r[b][1]*mvb;
 313             tmp2     = r[b][2]*mvb;
 314             x[i][0] -= tmp0;
 315             x[i][1] -= tmp1;
 316             x[i][2] -= tmp2;
 317             x[j][0] += tmp0;
 318             x[j][1] += tmp1;
 319             x[j][2] += tmp2;
 320         } /* 16 ncons flops */
 321     }
 322 }
 323
 324 static void lincs_update_atoms(struct gmx_lincsdata *li, int th,
 325                                real prefac,
 326                                const real *fac, rvec *r,
 327                                const real *invmass,
 328                                rvec *x)
 329 {
 330     if (li->nth == 1)
 331     {
 332         /* Single thread, we simply update for all constraints */
 333         lincs_update_atoms_noind(li->nc, li->bla, prefac, fac, r, invmass, x);
 334     }
 335     else
 336     {
 337         /* Update the atom vector components for our thread local
 338          * constraints that only access our local atom range.
 339          * This can be done without a barrier.
 340          */
 341         lincs_update_atoms_ind(li->th[th].nind, li->th[th].ind,
 342                                li->bla, prefac, fac, r, invmass, x);
 343
 344         if (li->th[li->nth].nind > 0)
 345         {
 346             /* Update the constraints that operate on atoms
 347              * in multiple thread atom blocks on the master thread.
 348              */
 349 #pragma omp barrier
 350 #pragma omp master
 351             {
 352                 lincs_update_atoms_ind(li->th[li->nth].nind,
 353                                        li->th[li->nth].ind,
 354                                        li->bla, prefac, fac, r, invmass, x);
 355             }
 356         }
 357     }
 358 }
 359
 360 /* LINCS projection, works on derivatives of the coordinates */
 361 static void do_lincsp(rvec *x, rvec *f, rvec *fp, t_pbc *pbc,
 362                       struct gmx_lincsdata *lincsd, int th,
 363                       real *invmass,
 364                       int econq, gmx_bool bCalcDHDL,
 365                       gmx_bool bCalcVir, tensor rmdf)
 366 {
 367     int      b0, b1, b, i, j, k, n;
 368     real     tmp0, tmp1, tmp2, mvb;
 369     rvec     dx;
 370     int     *bla, *blnr, *blbnb;
 371     rvec    *r;
 372     real    *blc, *blmf, *blcc, *rhs1, *rhs2, *sol;
 373
 374     b0 = lincsd->th[th].b0;
 375     b1 = lincsd->th[th].b1;
 376
 377     bla    = lincsd->bla;
 378     r      = lincsd->tmpv;
 379     blnr   = lincsd->blnr;
 380     blbnb  = lincsd->blbnb;
 381     if (econq != econqForce)
 382     {
 383         /* Use mass-weighted parameters */
 384         blc  = lincsd->blc;
 385         blmf = lincsd->blmf;
 386     }
 387     else
 388     {
 389         /* Use non mass-weighted parameters */
 390         blc  = lincsd->blc1;
 391         blmf = lincsd->blmf1;
 392     }
 393     blcc   = lincsd->tmpncc;
 394     rhs1   = lincsd->tmp1;
 395     rhs2   = lincsd->tmp2;
 396     sol    = lincsd->tmp3;
 397
 398     /* Compute normalized i-j vectors */
 399     if (pbc)
 400     {
 401         for (b = b0; b < b1; b++)
 402         {
 403             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
 404             unitv(dx, r[b]);
 405         }
 406     }
 407     else
 408     {
 409         for (b = b0; b < b1; b++)
 410         {
 411             rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
 412             unitv(dx, r[b]);
 413         } /* 16 ncons flops */
 414     }
 415
 416 #pragma omp barrier
 417     for (b = b0; b < b1; b++)
 418     {
 419         tmp0 = r[b][0];
 420         tmp1 = r[b][1];
 421         tmp2 = r[b][2];
 422         i    = bla[2*b];
 423         j    = bla[2*b+1];
 424         for (n = blnr[b]; n < blnr[b+1]; n++)
 425         {
 426             k       = blbnb[n];
 427             blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 428         } /* 6 nr flops */
 429         mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
 430                       tmp1*(f[i][1] - f[j][1]) +
 431                       tmp2*(f[i][2] - f[j][2]));
 432         rhs1[b] = mvb;
 433         sol[b]  = mvb;
 434         /* 7 flops */
 435     }
 436     /* Together: 23*ncons + 6*nrtot flops */
 437
 438     lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 439     /* nrec*(ncons+2*nrtot) flops */
 440
 441     if (econq == econqDeriv_FlexCon)
 442     {
 443         /* We only want to constraint the flexible constraints,
 444          * so we mask out the normal ones by setting sol to 0.
 445          */
 446         for (b = b0; b < b1; b++)
 447         {
 448             if (!(lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
 449             {
 450                 sol[b] = 0;
 451             }
 452         }
 453     }
 454
 455     /* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
 456     for (b = b0; b < b1; b++)
 457     {
 458         sol[b] *= blc[b];
 459     }
 460
 461     /* When constraining forces, we should not use mass weighting,
 462      * so we pass invmass=NULL, which results in the use of 1 for all atoms.
 463      */
 464     lincs_update_atoms(lincsd, th, 1.0, sol, r,
 465                        (econq != econqForce) ? invmass : NULL, fp);
 466
 467     if (bCalcDHDL)
 468     {
 469         real dhdlambda;
 470
 471         dhdlambda = 0;
 472         for (b = b0; b < b1; b++)
 473         {
 474             dhdlambda -= sol[b]*lincsd->ddist[b];
 475         }
 476
 477         lincsd->th[th].dhdlambda = dhdlambda;
 478     }
 479
 480     if (bCalcVir)
 481     {
 482         /* Constraint virial,
 483          * determines sum r_bond x delta f,
 484          * where delta f is the constraint correction
 485          * of the quantity that is being constrained.
 486          */
 487         for (b = b0; b < b1; b++)
 488         {
 489             mvb = lincsd->bllen[b]*sol[b];
 490             for (i = 0; i < DIM; i++)
 491             {
 492                 tmp1 = mvb*r[b][i];
 493                 for (j = 0; j < DIM; j++)
 494                 {
 495                     rmdf[i][j] += tmp1*r[b][j];
 496                 }
 497             }
 498         } /* 23 ncons flops */
 499     }
 500 }
 501
 502 static void do_lincs(rvec *x, rvec *xp, matrix box, t_pbc *pbc,
 503                      struct gmx_lincsdata *lincsd, int th,
 504                      real *invmass,
 505                      t_commrec *cr,
 506                      gmx_bool bCalcDHDL,
 507                      real wangle, int *warn,
 508                      real invdt, rvec *v,
 509                      gmx_bool bCalcVir, tensor vir_r_m_dr)
 510 {
 511     int      b0, b1, b, i, j, k, n, iter;
 512     real     tmp0, tmp1, tmp2, mvb, rlen, len, len2, dlen2, wfac;
 513     rvec     dx;
 514     int     *bla, *blnr, *blbnb;
 515     rvec    *r;
 516     real    *blc, *blmf, *bllen, *blcc, *rhs1, *rhs2, *sol, *blc_sol, *mlambda;
 517     int     *nlocat;
 518
 519     b0 = lincsd->th[th].b0;
 520     b1 = lincsd->th[th].b1;
 521
 522     bla     = lincsd->bla;
 523     r       = lincsd->tmpv;
 524     blnr    = lincsd->blnr;
 525     blbnb   = lincsd->blbnb;
 526     blc     = lincsd->blc;
 527     blmf    = lincsd->blmf;
 528     bllen   = lincsd->bllen;
 529     blcc    = lincsd->tmpncc;
 530     rhs1    = lincsd->tmp1;
 531     rhs2    = lincsd->tmp2;
 532     sol     = lincsd->tmp3;
 533     blc_sol = lincsd->tmp4;
 534     mlambda = lincsd->mlambda;
 535
 536     if (DOMAINDECOMP(cr) && cr->dd->constraints)
 537     {
 538         nlocat = dd_constraints_nlocalatoms(cr->dd);
 539     }
 540     else
 541     {
 542         nlocat = NULL;
 543     }
 544
 545     if (pbc)
 546     {
 547         /* Compute normalized i-j vectors */
 548         for (b = b0; b < b1; b++)
 549         {
 550             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
 551             unitv(dx, r[b]);
 552         }
 553 #pragma omp barrier
 554         for (b = b0; b < b1; b++)
 555         {
 556             for (n = blnr[b]; n < blnr[b+1]; n++)
 557             {
 558                 blcc[n] = blmf[n]*iprod(r[b], r[blbnb[n]]);
 559             }
 560             pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
 561             mvb     = blc[b]*(iprod(r[b], dx) - bllen[b]);
 562             rhs1[b] = mvb;
 563             sol[b]  = mvb;
 564         }
 565     }
 566     else
 567     {
 568         /* Compute normalized i-j vectors */
 569         for (b = b0; b < b1; b++)
 570         {
 571             i       = bla[2*b];
 572             j       = bla[2*b+1];
 573             tmp0    = x[i][0] - x[j][0];
 574             tmp1    = x[i][1] - x[j][1];
 575             tmp2    = x[i][2] - x[j][2];
 576             rlen    = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
 577             r[b][0] = rlen*tmp0;
 578             r[b][1] = rlen*tmp1;
 579             r[b][2] = rlen*tmp2;
 580         } /* 16 ncons flops */
 581
 582 #pragma omp barrier
 583         for (b = b0; b < b1; b++)
 584         {
 585             tmp0 = r[b][0];
 586             tmp1 = r[b][1];
 587             tmp2 = r[b][2];
 588             len  = bllen[b];
 589             i    = bla[2*b];
 590             j    = bla[2*b+1];
 591             for (n = blnr[b]; n < blnr[b+1]; n++)
 592             {
 593                 k       = blbnb[n];
 594                 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 595             } /* 6 nr flops */
 596             mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
 597                           tmp1*(xp[i][1] - xp[j][1]) +
 598                           tmp2*(xp[i][2] - xp[j][2]) - len);
 599             rhs1[b] = mvb;
 600             sol[b]  = mvb;
 601             /* 10 flops */
 602         }
 603         /* Together: 26*ncons + 6*nrtot flops */
 604     }
 605
 606     lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 607     /* nrec*(ncons+2*nrtot) flops */
 608
 609     for (b = b0; b < b1; b++)
 610     {
 611         mlambda[b] = blc[b]*sol[b];
 612     }
 613
 614     /* Update the coordinates */
 615     lincs_update_atoms(lincsd, th, 1.0, mlambda, r, invmass, xp);
 616
 617     /*
 618      ********  Correction for centripetal effects  ********
 619      */
 620
 621     wfac = cos(DEG2RAD*wangle);
 622     wfac = wfac*wfac;
 623
 624     for (iter = 0; iter < lincsd->nIter; iter++)
 625     {
 626         if ((lincsd->bCommIter && DOMAINDECOMP(cr) && cr->dd->constraints))
 627         {
 628 #pragma omp barrier
 629 #pragma omp master
 630             {
 631                 /* Communicate the corrected non-local coordinates */
 632                 if (DOMAINDECOMP(cr))
 633                 {
 634                     dd_move_x_constraints(cr->dd, box, xp, NULL, FALSE);
 635                 }
 636             }
 637         }
 638
 639 #pragma omp barrier
 640         for (b = b0; b < b1; b++)
 641         {
 642             len = bllen[b];
 643             if (pbc)
 644             {
 645                 pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
 646             }
 647             else
 648             {
 649                 rvec_sub(xp[bla[2*b]], xp[bla[2*b+1]], dx);
 650             }
 651             len2  = len*len;
 652             dlen2 = 2*len2 - norm2(dx);
 653             if (dlen2 < wfac*len2 && (nlocat == NULL || nlocat[b]))
 654             {
 655                 /* not race free - see detailed comment in caller */
 656                 *warn = b;
 657             }
 658             if (dlen2 > 0)
 659             {
 660                 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
 661             }
 662             else
 663             {
 664                 mvb = blc[b]*len;
 665             }
 666             rhs1[b] = mvb;
 667             sol[b]  = mvb;
 668         } /* 20*ncons flops */
 669
 670         lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 671         /* nrec*(ncons+2*nrtot) flops */
 672
 673         for (b = b0; b < b1; b++)
 674         {
 675             mvb         = blc[b]*sol[b];
 676             blc_sol[b]  = mvb;
 677             mlambda[b] += mvb;
 678         }
 679
 680         /* Update the coordinates */
 681         lincs_update_atoms(lincsd, th, 1.0, blc_sol, r, invmass, xp);
 682     }
 683     /* nit*ncons*(37+9*nrec) flops */
 684
 685     if (v != NULL)
 686     {
 687         /* Update the velocities */
 688         lincs_update_atoms(lincsd, th, invdt, mlambda, r, invmass, v);
 689         /* 16 ncons flops */
 690     }
 691
 692     if (nlocat != NULL && (bCalcDHDL || bCalcVir))
 693     {
 694         /* In lincs_update_atoms threads might cross-read mlambda */
 695 #pragma omp barrier
 696
 697         /* Only account for local atoms */
 698         for (b = b0; b < b1; b++)
 699         {
 700             mlambda[b] *= 0.5*nlocat[b];
 701         }
 702     }
 703
 704     if (bCalcDHDL)
 705     {
 706         real dhdl;
 707
 708         dhdl = 0;
 709         for (b = b0; b < b1; b++)
 710         {
 711             /* Note that this this is dhdl*dt^2, the dt^2 factor is corrected
 712              * later after the contributions are reduced over the threads.
 713              */
 714             dhdl -= lincsd->mlambda[b]*lincsd->ddist[b];
 715         }
 716         lincsd->th[th].dhdlambda = dhdl;
 717     }
 718
 719     if (bCalcVir)
 720     {
 721         /* Constraint virial */
 722         for (b = b0; b < b1; b++)
 723         {
 724             tmp0 = -bllen[b]*mlambda[b];
 725             for (i = 0; i < DIM; i++)
 726             {
 727                 tmp1 = tmp0*r[b][i];
 728                 for (j = 0; j < DIM; j++)
 729                 {
 730                     vir_r_m_dr[i][j] -= tmp1*r[b][j];
 731                 }
 732             }
 733         } /* 22 ncons flops */
 734     }
 735
 736     /* Total:
 737      * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
 738      * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
 739      *
 740      * (26+nrec)*ncons + (6+2*nrec)*nrtot
 741      * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
 742      * if nit=1
 743      * (63+nrec)*ncons + (6+4*nrec)*nrtot
 744      */
 745 }
 746
 747 void set_lincs_matrix(struct gmx_lincsdata *li, real *invmass, real lambda)
 748 {
 749     int        i, a1, a2, n, k, sign, center;
 750     int        end, nk, kk;
 751     const real invsqrt2 = 0.7071067811865475244;
 752
 753     for (i = 0; (i < li->nc); i++)
 754     {
 755         a1          = li->bla[2*i];
 756         a2          = li->bla[2*i+1];
 757         li->blc[i]  = gmx_invsqrt(invmass[a1] + invmass[a2]);
 758         li->blc1[i] = invsqrt2;
 759     }
 760
 761     /* Construct the coupling coefficient matrix blmf */
 762     li->ntriangle    = 0;
 763     li->ncc_triangle = 0;
 764     for (i = 0; (i < li->nc); i++)
 765     {
 766         a1 = li->bla[2*i];
 767         a2 = li->bla[2*i+1];
 768         for (n = li->blnr[i]; (n < li->blnr[i+1]); n++)
 769         {
 770             k = li->blbnb[n];
 771             if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
 772             {
 773                 sign = -1;
 774             }
 775             else
 776             {
 777                 sign = 1;
 778             }
 779             if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
 780             {
 781                 center = a1;
 782                 end    = a2;
 783             }
 784             else
 785             {
 786                 center = a2;
 787                 end    = a1;
 788             }
 789             li->blmf[n]  = sign*invmass[center]*li->blc[i]*li->blc[k];
 790             li->blmf1[n] = sign*0.5;
 791             if (li->ncg_triangle > 0)
 792             {
 793                 /* Look for constraint triangles */
 794                 for (nk = li->blnr[k]; (nk < li->blnr[k+1]); nk++)
 795                 {
 796                     kk = li->blbnb[nk];
 797                     if (kk != i && kk != k &&
 798                         (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
 799                     {
 800                         if (li->ntriangle == 0 ||
 801                             li->triangle[li->ntriangle-1] < i)
 802                         {
 803                             /* Add this constraint to the triangle list */
 804                             li->triangle[li->ntriangle] = i;
 805                             li->tri_bits[li->ntriangle] = 0;
 806                             li->ntriangle++;
 807                             if (li->blnr[i+1] - li->blnr[i] > static_cast<int>(sizeof(li->tri_bits[0])*8 - 1))
 808                             {
 809                                 gmx_fatal(FARGS, "A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
 810                                           li->blnr[i+1] - li->blnr[i],
 811                                           sizeof(li->tri_bits[0])*8-1);
 812                             }
 813                         }
 814                         li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
 815                         li->ncc_triangle++;
 816                     }
 817                 }
 818             }
 819         }
 820     }
 821
 822     if (debug)
 823     {
 824         fprintf(debug, "Of the %d constraints %d participate in triangles\n",
 825                 li->nc, li->ntriangle);
 826         fprintf(debug, "There are %d couplings of which %d in triangles\n",
 827                 li->ncc, li->ncc_triangle);
 828     }
 829
 830     /* Set matlam,
 831      * so we know with which lambda value the masses have been set.
 832      */
 833     li->matlam = lambda;
 834 }
 835
 836 static int count_triangle_constraints(t_ilist *ilist, t_blocka *at2con)
 837 {
 838     int      ncon1, ncon_tot;
 839     int      c0, a00, a01, n1, c1, a10, a11, ac1, n2, c2, a20, a21;
 840     int      ncon_triangle;
 841     gmx_bool bTriangle;
 842     t_iatom *ia1, *ia2, *iap;
 843
 844     ncon1    = ilist[F_CONSTR].nr/3;
 845     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 846
 847     ia1 = ilist[F_CONSTR].iatoms;
 848     ia2 = ilist[F_CONSTRNC].iatoms;
 849
 850     ncon_triangle = 0;
 851     for (c0 = 0; c0 < ncon_tot; c0++)
 852     {
 853         bTriangle = FALSE;
 854         iap       = constr_iatomptr(ncon1, ia1, ia2, c0);
 855         a00       = iap[1];
 856         a01       = iap[2];
 857         for (n1 = at2con->index[a01]; n1 < at2con->index[a01+1]; n1++)
 858         {
 859             c1 = at2con->a[n1];
 860             if (c1 != c0)
 861             {
 862                 iap = constr_iatomptr(ncon1, ia1, ia2, c1);
 863                 a10 = iap[1];
 864                 a11 = iap[2];
 865                 if (a10 == a01)
 866                 {
 867                     ac1 = a11;
 868                 }
 869                 else
 870                 {
 871                     ac1 = a10;
 872                 }
 873                 for (n2 = at2con->index[ac1]; n2 < at2con->index[ac1+1]; n2++)
 874                 {
 875                     c2 = at2con->a[n2];
 876                     if (c2 != c0 && c2 != c1)
 877                     {
 878                         iap = constr_iatomptr(ncon1, ia1, ia2, c2);
 879                         a20 = iap[1];
 880                         a21 = iap[2];
 881                         if (a20 == a00 || a21 == a00)
 882                         {
 883                             bTriangle = TRUE;
 884                         }
 885                     }
 886                 }
 887             }
 888         }
 889         if (bTriangle)
 890         {
 891             ncon_triangle++;
 892         }
 893     }
 894
 895     return ncon_triangle;
 896 }
 897
 898 static gmx_bool more_than_two_sequential_constraints(const t_ilist  *ilist,
 899                                                      const t_blocka *at2con)
 900 {
 901     t_iatom  *ia1, *ia2, *iap;
 902     int       ncon1, ncon_tot, c;
 903     int       a1, a2;
 904     gmx_bool  bMoreThanTwoSequentialConstraints;
 905
 906     ncon1    = ilist[F_CONSTR].nr/3;
 907     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 908
 909     ia1 = ilist[F_CONSTR].iatoms;
 910     ia2 = ilist[F_CONSTRNC].iatoms;
 911
 912     bMoreThanTwoSequentialConstraints = FALSE;
 913     for (c = 0; c < ncon_tot && !bMoreThanTwoSequentialConstraints; c++)
 914     {
 915         iap = constr_iatomptr(ncon1, ia1, ia2, c);
 916         a1  = iap[1];
 917         a2  = iap[2];
 918         /* Check if this constraint has constraints connected at both atoms */
 919         if (at2con->index[a1+1] - at2con->index[a1] > 1 &&
 920             at2con->index[a2+1] - at2con->index[a2] > 1)
 921         {
 922             bMoreThanTwoSequentialConstraints = TRUE;
 923         }
 924     }
 925
 926     return bMoreThanTwoSequentialConstraints;
 927 }
 928
 929 static int int_comp(const void *a, const void *b)
 930 {
 931     return (*(int *)a) - (*(int *)b);
 932 }
 933
 934 gmx_lincsdata_t init_lincs(FILE *fplog, gmx_mtop_t *mtop,
 935                            int nflexcon_global, t_blocka *at2con,
 936                            gmx_bool bPLINCS, int nIter, int nProjOrder)
 937 {
 938     struct gmx_lincsdata *li;
 939     int                   mb;
 940     gmx_moltype_t        *molt;
 941
 942     if (fplog)
 943     {
 944         fprintf(fplog, "\nInitializing%s LINear Constraint Solver\n",
 945                 bPLINCS ? " Parallel" : "");
 946     }
 947
 948     snew(li, 1);
 949
 950     li->ncg      =
 951         gmx_mtop_ftype_count(mtop, F_CONSTR) +
 952         gmx_mtop_ftype_count(mtop, F_CONSTRNC);
 953     li->ncg_flex = nflexcon_global;
 954
 955     li->nIter  = nIter;
 956     li->nOrder = nProjOrder;
 957
 958     li->ncg_triangle = 0;
 959     li->bCommIter    = FALSE;
 960     for (mb = 0; mb < mtop->nmolblock; mb++)
 961     {
 962         molt              = &mtop->moltype[mtop->molblock[mb].type];
 963         li->ncg_triangle +=
 964             mtop->molblock[mb].nmol*
 965             count_triangle_constraints(molt->ilist,
 966                                        &at2con[mtop->molblock[mb].type]);
 967         if (bPLINCS && li->bCommIter == FALSE)
 968         {
 969             /* Check if we need to communicate not only before LINCS,
 970              * but also before each iteration.
 971              * The check for only two sequential constraints is only
 972              * useful for the common case of H-bond only constraints.
 973              * With more effort we could also make it useful for small
 974              * molecules with nr. sequential constraints <= nOrder-1.
 975              */
 976             li->bCommIter = (li->nOrder < 1 || more_than_two_sequential_constraints(molt->ilist, &at2con[mtop->molblock[mb].type]));
 977         }
 978     }
 979     if (debug && bPLINCS)
 980     {
 981         fprintf(debug, "PLINCS communication before each iteration: %d\n",
 982                 li->bCommIter);
 983     }
 984
 985     /* LINCS can run on any number of threads.
 986      * Currently the number is fixed for the whole simulation,
 987      * but it could be set in set_lincs().
 988      */
 989     li->nth = gmx_omp_nthreads_get(emntLINCS);
 990     if (li->nth == 1)
 991     {
 992         snew(li->th, 1);
 993     }
 994     else
 995     {
 996         /* Allocate an extra elements for "thread-overlap" constraints */
 997         snew(li->th, li->nth+1);
 998     }
 999     if (debug)
1000     {
1001         fprintf(debug, "LINCS: using %d threads\n", li->nth);
1002     }
1003
1004     if (bPLINCS || li->ncg_triangle > 0)
1005     {
1006         please_cite(fplog, "Hess2008a");
1007     }
1008     else
1009     {
1010         please_cite(fplog, "Hess97a");
1011     }
1012
1013     if (fplog)
1014     {
1015         fprintf(fplog, "The number of constraints is %d\n", li->ncg);
1016         if (bPLINCS)
1017         {
1018             fprintf(fplog, "There are inter charge-group constraints,\n"
1019                     "will communicate selected coordinates each lincs iteration\n");
1020         }
1021         if (li->ncg_triangle > 0)
1022         {
1023             fprintf(fplog,
1024                     "%d constraints are involved in constraint triangles,\n"
1025                     "will apply an additional matrix expansion of order %d for couplings\n"
1026                     "between constraints inside triangles\n",
1027                     li->ncg_triangle, li->nOrder);
1028         }
1029     }
1030
1031     return li;
1032 }
1033
1034 /* Sets up the work division over the threads */
1035 static void lincs_thread_setup(struct gmx_lincsdata *li, int natoms)
1036 {
1037     lincs_thread_t *li_m;
1038     int             th;
1039     gmx_bitmask_t  *atf;
1040     int             a;
1041
1042     if (natoms > li->atf_nalloc)
1043     {
1044         li->atf_nalloc = over_alloc_large(natoms);
1045         srenew(li->atf, li->atf_nalloc);
1046     }
1047
1048     atf = li->atf;
1049     /* Clear the atom flags */
1050     for (a = 0; a < natoms; a++)
1051     {
1052         bitmask_clear(&atf[a]);
1053     }
1054
1055     if (li->nth > BITMASK_SIZE)
1056     {
1057         gmx_fatal(FARGS, "More than %d threads is not supported for LINCS.", BITMASK_SIZE);
1058     }
1059
1060     for (th = 0; th < li->nth; th++)
1061     {
1062         lincs_thread_t *li_th;
1063         int             b;
1064
1065         li_th = &li->th[th];
1066
1067         /* The constraints are divided equally over the threads */
1068         li_th->b0 = (li->nc* th   )/li->nth;
1069         li_th->b1 = (li->nc*(th+1))/li->nth;
1070
1071         /* For each atom set a flag for constraints from each */
1072         for (b = li_th->b0; b < li_th->b1; b++)
1073         {
1074             bitmask_set_bit(&atf[li->bla[b*2]], th);
1075             bitmask_set_bit(&atf[li->bla[b*2+1]], th);
1076         }
1077     }
1078
1079 #pragma omp parallel for num_threads(li->nth) schedule(static)
1080     for (th = 0; th < li->nth; th++)
1081     {
1082         lincs_thread_t *li_th;
1083         gmx_bitmask_t   mask;
1084         int             b;
1085
1086         li_th = &li->th[th];
1087
1088         if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
1089         {
1090             li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
1091             srenew(li_th->ind, li_th->ind_nalloc);
1092             srenew(li_th->ind_r, li_th->ind_nalloc);
1093         }
1094
1095         bitmask_init_low_bits(&mask, th);
1096
1097         li_th->nind   = 0;
1098         li_th->nind_r = 0;
1099         for (b = li_th->b0; b < li_th->b1; b++)
1100         {
1101             /* We let the constraint with the lowest thread index
1102              * operate on atoms with constraints from multiple threads.
1103              */
1104             if (bitmask_is_disjoint(atf[li->bla[b*2]], mask) &&
1105                 bitmask_is_disjoint(atf[li->bla[b*2+1]], mask))
1106             {
1107                 /* Add the constraint to the local atom update index */
1108                 li_th->ind[li_th->nind++] = b;
1109             }
1110             else
1111             {
1112                 /* Add the constraint to the rest block */
1113                 li_th->ind_r[li_th->nind_r++] = b;
1114             }
1115         }
1116     }
1117
1118     /* We need to copy all constraints which have not be assigned
1119      * to a thread to a separate list which will be handled by one thread.
1120      */
1121     li_m = &li->th[li->nth];
1122
1123     li_m->nind = 0;
1124     for (th = 0; th < li->nth; th++)
1125     {
1126         lincs_thread_t *li_th;
1127         int             b;
1128
1129         li_th   = &li->th[th];
1130
1131         if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1132         {
1133             li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1134             srenew(li_m->ind, li_m->ind_nalloc);
1135         }
1136
1137         for (b = 0; b < li_th->nind_r; b++)
1138         {
1139             li_m->ind[li_m->nind++] = li_th->ind_r[b];
1140         }
1141
1142         if (debug)
1143         {
1144             fprintf(debug, "LINCS thread %d: %d constraints\n",
1145                     th, li_th->nind);
1146         }
1147     }
1148
1149     if (debug)
1150     {
1151         fprintf(debug, "LINCS thread r: %d constraints\n",
1152                 li_m->nind);
1153     }
1154 }
1155
1156
1157 void set_lincs(t_idef *idef, t_mdatoms *md,
1158                gmx_bool bDynamics, t_commrec *cr,
1159                struct gmx_lincsdata *li)
1160 {
1161     int          start, natoms, nflexcon;
1162     t_blocka     at2con;
1163     t_iatom     *iatom;
1164     int          i, k, ncc_alloc, ni, con, nconnect, concon;
1165     int          type, a1, a2;
1166     real         lenA = 0, lenB;
1167
1168     li->nc  = 0;
1169     li->ncc = 0;
1170     /* Zero the thread index ranges.
1171      * Otherwise without local constraints we could return with old ranges.
1172      */
1173     for (i = 0; i < li->nth; i++)
1174     {
1175         li->th[i].b0   = 0;
1176         li->th[i].b1   = 0;
1177         li->th[i].nind = 0;
1178     }
1179     if (li->nth > 1)
1180     {
1181         li->th[li->nth].nind = 0;
1182     }
1183
1184     /* This is the local topology, so there are only F_CONSTR constraints */
1185     if (idef->il[F_CONSTR].nr == 0)
1186     {
1187         /* There are no constraints,
1188          * we do not need to fill any data structures.
1189          */
1190         return;
1191     }
1192
1193     if (debug)
1194     {
1195         fprintf(debug, "Building the LINCS connectivity\n");
1196     }
1197
1198     if (DOMAINDECOMP(cr))
1199     {
1200         if (cr->dd->constraints)
1201         {
1202             dd_get_constraint_range(cr->dd, &start, &natoms);
1203         }
1204         else
1205         {
1206             natoms = cr->dd->nat_home;
1207         }
1208         start = 0;
1209     }
1210     else
1211     {
1212         start  = 0;
1213         natoms = md->homenr;
1214     }
1215     at2con = make_at2con(start, natoms, idef->il, idef->iparams, bDynamics,
1216                          &nflexcon);
1217
1218
1219     if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1220     {
1221         li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1222         srenew(li->bllen0, li->nc_alloc);
1223         srenew(li->ddist, li->nc_alloc);
1224         srenew(li->bla, 2*li->nc_alloc);
1225         srenew(li->blc, li->nc_alloc);
1226         srenew(li->blc1, li->nc_alloc);
1227         srenew(li->blnr, li->nc_alloc+1);
1228         srenew(li->bllen, li->nc_alloc);
1229         srenew(li->tmpv, li->nc_alloc);
1230         srenew(li->tmp1, li->nc_alloc);
1231         srenew(li->tmp2, li->nc_alloc);
1232         srenew(li->tmp3, li->nc_alloc);
1233         srenew(li->tmp4, li->nc_alloc);
1234         srenew(li->mlambda, li->nc_alloc);
1235         if (li->ncg_triangle > 0)
1236         {
1237             /* This is allocating too much, but it is difficult to improve */
1238             srenew(li->triangle, li->nc_alloc);
1239             srenew(li->tri_bits, li->nc_alloc);
1240         }
1241     }
1242
1243     iatom = idef->il[F_CONSTR].iatoms;
1244
1245     ncc_alloc   = li->ncc_alloc;
1246     li->blnr[0] = 0;
1247
1248     ni = idef->il[F_CONSTR].nr/3;
1249
1250     con           = 0;
1251     nconnect      = 0;
1252     li->blnr[con] = nconnect;
1253     for (i = 0; i < ni; i++)
1254     {
1255         type   = iatom[3*i];
1256         a1     = iatom[3*i+1];
1257         a2     = iatom[3*i+2];
1258         lenA   = idef->iparams[type].constr.dA;
1259         lenB   = idef->iparams[type].constr.dB;
1260         /* Skip the flexible constraints when not doing dynamics */
1261         if (bDynamics || lenA != 0 || lenB != 0)
1262         {
1263             li->bllen0[con]  = lenA;
1264             li->ddist[con]   = lenB - lenA;
1265             /* Set the length to the topology A length */
1266             li->bllen[con]   = li->bllen0[con];
1267             li->bla[2*con]   = a1;
1268             li->bla[2*con+1] = a2;
1269             /* Construct the constraint connection matrix blbnb */
1270             for (k = at2con.index[a1-start]; k < at2con.index[a1-start+1]; k++)
1271             {
1272                 concon = at2con.a[k];
1273                 if (concon != i)
1274                 {
1275                     if (nconnect >= ncc_alloc)
1276                     {
1277                         ncc_alloc = over_alloc_small(nconnect+1);
1278                         srenew(li->blbnb, ncc_alloc);
1279                     }
1280                     li->blbnb[nconnect++] = concon;
1281                 }
1282             }
1283             for (k = at2con.index[a2-start]; k < at2con.index[a2-start+1]; k++)
1284             {
1285                 concon = at2con.a[k];
1286                 if (concon != i)
1287                 {
1288                     if (nconnect+1 > ncc_alloc)
1289                     {
1290                         ncc_alloc = over_alloc_small(nconnect+1);
1291                         srenew(li->blbnb, ncc_alloc);
1292                     }
1293                     li->blbnb[nconnect++] = concon;
1294                 }
1295             }
1296             li->blnr[con+1] = nconnect;
1297
1298             if (cr->dd == NULL)
1299             {
1300                 /* Order the blbnb matrix to optimize memory access */
1301                 qsort(&(li->blbnb[li->blnr[con]]), li->blnr[con+1]-li->blnr[con],
1302                       sizeof(li->blbnb[0]), int_comp);
1303             }
1304             /* Increase the constraint count */
1305             con++;
1306         }
1307     }
1308
1309     done_blocka(&at2con);
1310
1311     /* This is the real number of constraints,
1312      * without dynamics the flexible constraints are not present.
1313      */
1314     li->nc = con;
1315
1316     li->ncc = li->blnr[con];
1317     if (cr->dd == NULL)
1318     {
1319         /* Since the matrix is static, we can free some memory */
1320         ncc_alloc = li->ncc;
1321         srenew(li->blbnb, ncc_alloc);
1322     }
1323
1324     if (ncc_alloc > li->ncc_alloc)
1325     {
1326         li->ncc_alloc = ncc_alloc;
1327         srenew(li->blmf, li->ncc_alloc);
1328         srenew(li->blmf1, li->ncc_alloc);
1329         srenew(li->tmpncc, li->ncc_alloc);
1330     }
1331
1332     if (debug)
1333     {
1334         fprintf(debug, "Number of constraints is %d, couplings %d\n",
1335                 li->nc, li->ncc);
1336     }
1337
1338     if (li->nth == 1)
1339     {
1340         li->th[0].b0 = 0;
1341         li->th[0].b1 = li->nc;
1342     }
1343     else
1344     {
1345         lincs_thread_setup(li, md->nr);
1346     }
1347
1348     set_lincs_matrix(li, md->invmass, md->lambda);
1349 }
1350
1351 static void lincs_warning(FILE *fplog,
1352                           gmx_domdec_t *dd, rvec *x, rvec *xprime, t_pbc *pbc,
1353                           int ncons, int *bla, real *bllen, real wangle,
1354                           int maxwarn, int *warncount)
1355 {
1356     int  b, i, j;
1357     rvec v0, v1;
1358     real wfac, d0, d1, cosine;
1359     char buf[STRLEN];
1360
1361     wfac = cos(DEG2RAD*wangle);
1362
1363     sprintf(buf, "bonds that rotated more than %g degrees:\n"
1364             " atom 1 atom 2  angle  previous, current, constraint length\n",
1365             wangle);
1366     fprintf(stderr, "%s", buf);
1367     if (fplog)
1368     {
1369         fprintf(fplog, "%s", buf);
1370     }
1371
1372     for (b = 0; b < ncons; b++)
1373     {
1374         i = bla[2*b];
1375         j = bla[2*b+1];
1376         if (pbc)
1377         {
1378             pbc_dx_aiuc(pbc, x[i], x[j], v0);
1379             pbc_dx_aiuc(pbc, xprime[i], xprime[j], v1);
1380         }
1381         else
1382         {
1383             rvec_sub(x[i], x[j], v0);
1384             rvec_sub(xprime[i], xprime[j], v1);
1385         }
1386         d0     = norm(v0);
1387         d1     = norm(v1);
1388         cosine = iprod(v0, v1)/(d0*d1);
1389         if (cosine < wfac)
1390         {
1391             sprintf(buf, " %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n",
1392                     ddglatnr(dd, i), ddglatnr(dd, j),
1393                     RAD2DEG*acos(cosine), d0, d1, bllen[b]);
1394             fprintf(stderr, "%s", buf);
1395             if (fplog)
1396             {
1397                 fprintf(fplog, "%s", buf);
1398             }
1399             if (!gmx_isfinite(d1))
1400             {
1401                 gmx_fatal(FARGS, "Bond length not finite.");
1402             }
1403
1404             (*warncount)++;
1405         }
1406     }
1407     if (*warncount > maxwarn)
1408     {
1409         too_many_constraint_warnings(econtLINCS, *warncount);
1410     }
1411 }
1412
1413 static void cconerr(gmx_domdec_t *dd,
1414                     int ncons, int *bla, real *bllen, rvec *x, t_pbc *pbc,
1415                     real *ncons_loc, real *ssd, real *max, int *imax)
1416 {
1417     real       len, d, ma, ssd2, r2;
1418     int       *nlocat, count, b, im;
1419     rvec       dx;
1420
1421     if (dd && dd->constraints)
1422     {
1423         nlocat = dd_constraints_nlocalatoms(dd);
1424     }
1425     else
1426     {
1427         nlocat = 0;
1428     }
1429
1430     ma    = 0;
1431     ssd2  = 0;
1432     im    = 0;
1433     count = 0;
1434     for (b = 0; b < ncons; b++)
1435     {
1436         if (pbc)
1437         {
1438             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
1439         }
1440         else
1441         {
1442             rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
1443         }
1444         r2  = norm2(dx);
1445         len = r2*gmx_invsqrt(r2);
1446         d   = fabs(len/bllen[b]-1);
1447         if (d > ma && (nlocat == NULL || nlocat[b]))
1448         {
1449             ma = d;
1450             im = b;
1451         }
1452         if (nlocat == NULL)
1453         {
1454             ssd2 += d*d;
1455             count++;
1456         }
1457         else
1458         {
1459             ssd2  += nlocat[b]*d*d;
1460             count += nlocat[b];
1461         }
1462     }
1463
1464     *ncons_loc = (nlocat ? 0.5 : 1)*count;
1465     *ssd       = (nlocat ? 0.5 : 1)*ssd2;
1466     *max       = ma;
1467     *imax      = im;
1468 }
1469
1470 gmx_bool constrain_lincs(FILE *fplog, gmx_bool bLog, gmx_bool bEner,
1471                          t_inputrec *ir,
1472                          gmx_int64_t step,
1473                          struct gmx_lincsdata *lincsd, t_mdatoms *md,
1474                          t_commrec *cr,
1475                          rvec *x, rvec *xprime, rvec *min_proj,
1476                          matrix box, t_pbc *pbc,
1477                          real lambda, real *dvdlambda,
1478                          real invdt, rvec *v,
1479                          gmx_bool bCalcVir, tensor vir_r_m_dr,
1480                          int econq,
1481                          t_nrnb *nrnb,
1482                          int maxwarn, int *warncount)
1483 {
1484     gmx_bool  bCalcDHDL;
1485     char      buf[STRLEN], buf2[22], buf3[STRLEN];
1486     int       i, warn, p_imax;
1487     real      ncons_loc, p_ssd, p_max = 0;
1488     rvec      dx;
1489     gmx_bool  bOK;
1490
1491     bOK = TRUE;
1492
1493     /* This boolean should be set by a flag passed to this routine.
1494      * We can also easily check if any constraint length is changed,
1495      * if not dH/dlambda=0 and we can also set the boolean to FALSE.
1496      */
1497     bCalcDHDL = (ir->efep != efepNO && dvdlambda != NULL);
1498
1499     if (lincsd->nc == 0 && cr->dd == NULL)
1500     {
1501         if (bLog || bEner)
1502         {
1503             lincsd->rmsd_data[0] = 0;
1504             if (ir->eI == eiSD2 && v == NULL)
1505             {
1506                 i = 2;
1507             }
1508             else
1509             {
1510                 i = 1;
1511             }
1512             lincsd->rmsd_data[i] = 0;
1513         }
1514
1515         return bOK;
1516     }
1517
1518     if (econq == econqCoord)
1519     {
1520         /* We can't use bCalcDHDL here, since NULL can be passed for dvdlambda
1521          * also with efep!=fepNO.
1522          */
1523         if (ir->efep != efepNO)
1524         {
1525             if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1526             {
1527                 set_lincs_matrix(lincsd, md->invmass, md->lambda);
1528             }
1529
1530             for (i = 0; i < lincsd->nc; i++)
1531             {
1532                 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1533             }
1534         }
1535
1536         if (lincsd->ncg_flex)
1537         {
1538             /* Set the flexible constraint lengths to the old lengths */
1539             if (pbc != NULL)
1540             {
1541                 for (i = 0; i < lincsd->nc; i++)
1542                 {
1543                     if (lincsd->bllen[i] == 0)
1544                     {
1545                         pbc_dx_aiuc(pbc, x[lincsd->bla[2*i]], x[lincsd->bla[2*i+1]], dx);
1546                         lincsd->bllen[i] = norm(dx);
1547                     }
1548                 }
1549             }
1550             else
1551             {
1552                 for (i = 0; i < lincsd->nc; i++)
1553                 {
1554                     if (lincsd->bllen[i] == 0)
1555                     {
1556                         lincsd->bllen[i] =
1557                             sqrt(distance2(x[lincsd->bla[2*i]],
1558                                            x[lincsd->bla[2*i+1]]));
1559                     }
1560                 }
1561             }
1562         }
1563
1564         if (bLog && fplog)
1565         {
1566             cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1567                     &ncons_loc, &p_ssd, &p_max, &p_imax);
1568         }
1569
1570         /* This warn var can be updated by multiple threads
1571          * at the same time. But as we only need to detect
1572          * if a warning occured or not, this is not an issue.
1573          */
1574         warn = -1;
1575
1576         /* The OpenMP parallel region of constrain_lincs for coords */
1577 #pragma omp parallel num_threads(lincsd->nth)
1578         {
1579             int th = gmx_omp_get_thread_num();
1580
1581             clear_mat(lincsd->th[th].vir_r_m_dr);
1582
1583             do_lincs(x, xprime, box, pbc, lincsd, th,
1584                      md->invmass, cr,
1585                      bCalcDHDL,
1586                      ir->LincsWarnAngle, &warn,
1587                      invdt, v, bCalcVir,
1588                      th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1589         }
1590
1591         if (bLog && fplog && lincsd->nc > 0)
1592         {
1593             fprintf(fplog, "   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
1594             fprintf(fplog, "       Before LINCS          %.6f    %.6f %6d %6d\n",
1595                     sqrt(p_ssd/ncons_loc), p_max,
1596                     ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1597                     ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1598         }
1599         if (bLog || bEner)
1600         {
1601             cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1602                     &ncons_loc, &p_ssd, &p_max, &p_imax);
1603             /* Check if we are doing the second part of SD */
1604             if (ir->eI == eiSD2 && v == NULL)
1605             {
1606                 i = 2;
1607             }
1608             else
1609             {
1610                 i = 1;
1611             }
1612             lincsd->rmsd_data[0] = ncons_loc;
1613             lincsd->rmsd_data[i] = p_ssd;
1614         }
1615         else
1616         {
1617             lincsd->rmsd_data[0] = 0;
1618             lincsd->rmsd_data[1] = 0;
1619             lincsd->rmsd_data[2] = 0;
1620         }
1621         if (bLog && fplog && lincsd->nc > 0)
1622         {
1623             fprintf(fplog,
1624                     "        After LINCS          %.6f    %.6f %6d %6d\n\n",
1625                     sqrt(p_ssd/ncons_loc), p_max,
1626                     ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1627                     ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1628         }
1629
1630         if (warn >= 0)
1631         {
1632             if (maxwarn >= 0)
1633             {
1634                 cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1635                         &ncons_loc, &p_ssd, &p_max, &p_imax);
1636                 if (MULTISIM(cr))
1637                 {
1638                     sprintf(buf3, " in simulation %d", cr->ms->sim);
1639                 }
1640                 else
1641                 {
1642                     buf3[0] = 0;
1643                 }
1644                 sprintf(buf, "\nStep %s, time %g (ps)  LINCS WARNING%s\n"
1645                         "relative constraint deviation after LINCS:\n"
1646                         "rms %.6f, max %.6f (between atoms %d and %d)\n",
1647                         gmx_step_str(step, buf2), ir->init_t+step*ir->delta_t,
1648                         buf3,
1649                         sqrt(p_ssd/ncons_loc), p_max,
1650                         ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1651                         ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1652                 if (fplog)
1653                 {
1654                     fprintf(fplog, "%s", buf);
1655                 }
1656                 fprintf(stderr, "%s", buf);
1657                 lincs_warning(fplog, cr->dd, x, xprime, pbc,
1658                               lincsd->nc, lincsd->bla, lincsd->bllen,
1659                               ir->LincsWarnAngle, maxwarn, warncount);
1660             }
1661             bOK = (p_max < 0.5);
1662         }
1663
1664         if (lincsd->ncg_flex)
1665         {
1666             for (i = 0; (i < lincsd->nc); i++)
1667             {
1668                 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1669                 {
1670                     lincsd->bllen[i] = 0;
1671                 }
1672             }
1673         }
1674     }
1675     else
1676     {
1677         /* The OpenMP parallel region of constrain_lincs for derivatives */
1678 #pragma omp parallel num_threads(lincsd->nth)
1679         {
1680             int th = gmx_omp_get_thread_num();
1681
1682             do_lincsp(x, xprime, min_proj, pbc, lincsd, th,
1683                       md->invmass, econq, bCalcDHDL,
1684                       bCalcVir, th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1685         }
1686     }
1687
1688     if (bCalcDHDL)
1689     {
1690         /* Reduce the dH/dlambda contributions over the threads */
1691         real dhdlambda;
1692         int  th;
1693
1694         dhdlambda = 0;
1695         for (th = 0; th < lincsd->nth; th++)
1696         {
1697             dhdlambda += lincsd->th[th].dhdlambda;
1698         }
1699         if (econqCoord)
1700         {
1701             /* dhdlambda contains dH/dlambda*dt^2, correct for this */
1702             /* TODO This should probably use invdt, so that sd integrator scaling works properly */
1703             dhdlambda /= ir->delta_t*ir->delta_t;
1704         }
1705         *dvdlambda += dhdlambda;
1706     }
1707
1708     if (bCalcVir && lincsd->nth > 1)
1709     {
1710         for (i = 1; i < lincsd->nth; i++)
1711         {
1712             m_add(vir_r_m_dr, lincsd->th[i].vir_r_m_dr, vir_r_m_dr);
1713         }
1714     }
1715
1716     /* count assuming nit=1 */
1717     inc_nrnb(nrnb, eNR_LINCS, lincsd->nc);
1718     inc_nrnb(nrnb, eNR_LINCSMAT, (2+lincsd->nOrder)*lincsd->ncc);
1719     if (lincsd->ntriangle > 0)
1720     {
1721         inc_nrnb(nrnb, eNR_LINCSMAT, lincsd->nOrder*lincsd->ncc_triangle);
1722     }
1723     if (v)
1724     {
1725         inc_nrnb(nrnb, eNR_CONSTR_V, lincsd->nc*2);
1726     }
1727     if (bCalcVir)
1728     {
1729         inc_nrnb(nrnb, eNR_CONSTR_VIR, lincsd->nc);
1730     }
1731
1732     return bOK;
1733 }