src/gromacs/mdlib/clincs.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15  *
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 /* This file is completely threadsafe - keep it that way! */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include "main.h"
  43 #include "constr.h"
  44 #include "copyrite.h"
  45 #include "physics.h"
  46 #include "vec.h"
  47 #include "pbc.h"
  48 #include "smalloc.h"
  49 #include "mdrun.h"
  50 #include "nrnb.h"
  51 #include "domdec.h"
  52 #include "partdec.h"
  53 #include "mtop_util.h"
  54 #include "gmx_omp_nthreads.h"
  55
  56 #include "gromacs/fileio/gmxfio.h"
  57 #include "gromacs/utility/gmxomp.h"
  58
  59 typedef struct {
  60     int    b0;         /* first constraint for this thread */
  61     int    b1;         /* b1-1 is the last constraint for this thread */
  62     int    nind;       /* number of indices */
  63     int   *ind;        /* constraint index for updating atom data */
  64     int    nind_r;     /* number of indices */
  65     int   *ind_r;      /* constraint index for updating atom data */
  66     int    ind_nalloc; /* allocation size of ind and ind_r */
  67     tensor vir_r_m_dr; /* temporary variable for virial calculation */
  68 } lincs_thread_t;
  69
  70 typedef struct gmx_lincsdata {
  71     int             ncg;          /* the global number of constraints */
  72     int             ncg_flex;     /* the global number of flexible constraints */
  73     int             ncg_triangle; /* the global number of constraints in triangles */
  74     int             nIter;        /* the number of iterations */
  75     int             nOrder;       /* the order of the matrix expansion */
  76     int             nc;           /* the number of constraints */
  77     int             nc_alloc;     /* the number we allocated memory for */
  78     int             ncc;          /* the number of constraint connections */
  79     int             ncc_alloc;    /* the number we allocated memory for */
  80     real            matlam;       /* the FE lambda value used for filling blc and blmf */
  81     real           *bllen0;       /* the reference distance in topology A */
  82     real           *ddist;        /* the reference distance in top B - the r.d. in top A */
  83     int            *bla;          /* the atom pairs involved in the constraints */
  84     real           *blc;          /* 1/sqrt(invmass1 + invmass2) */
  85     real           *blc1;         /* as blc, but with all masses 1 */
  86     int            *blnr;         /* index into blbnb and blmf */
  87     int            *blbnb;        /* list of constraint connections */
  88     int             ntriangle;    /* the local number of constraints in triangles */
  89     int            *triangle;     /* the list of triangle constraints */
  90     int            *tri_bits;     /* the bits tell if the matrix element should be used */
  91     int             ncc_triangle; /* the number of constraint connections in triangles */
  92     gmx_bool        bCommIter;    /* communicate before each LINCS interation */
  93     real           *blmf;         /* matrix of mass factors for constraint connections */
  94     real           *blmf1;        /* as blmf, but with all masses 1 */
  95     real           *bllen;        /* the reference bond length */
  96     int             nth;          /* The number of threads doing LINCS */
  97     lincs_thread_t *th;           /* LINCS thread division */
  98     unsigned       *atf;          /* atom flags for thread parallelization */
  99     int             atf_nalloc;   /* allocation size of atf */
 100     /* arrays for temporary storage in the LINCS algorithm */
 101     rvec           *tmpv;
 102     real           *tmpncc;
 103     real           *tmp1;
 104     real           *tmp2;
 105     real           *tmp3;
 106     real           *tmp4;
 107     real           *mlambda; /* the Lagrange multipliers * -1 */
 108     /* storage for the constraint RMS relative deviation output */
 109     real            rmsd_data[3];
 110 } t_gmx_lincsdata;
 111
 112 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
 113 {
 114     return lincsd->rmsd_data;
 115 }
 116
 117 real lincs_rmsd(struct gmx_lincsdata *lincsd, gmx_bool bSD2)
 118 {
 119     if (lincsd->rmsd_data[0] > 0)
 120     {
 121         return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
 122     }
 123     else
 124     {
 125         return 0;
 126     }
 127 }
 128
 129 /* Do a set of nrec LINCS matrix multiplications.
 130  * This function will return with up to date thread-local
 131  * constraint data, without an OpenMP barrier.
 132  */
 133 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
 134                                 int b0, int b1,
 135                                 const real *blcc,
 136                                 real *rhs1, real *rhs2, real *sol)
 137 {
 138     int        nrec, rec, b, j, n, nr0, nr1;
 139     real       mvb, *swap;
 140     int        ntriangle, tb, bits;
 141     const int *blnr     = lincsd->blnr, *blbnb = lincsd->blbnb;
 142     const int *triangle = lincsd->triangle, *tri_bits = lincsd->tri_bits;
 143
 144     ntriangle = lincsd->ntriangle;
 145     nrec      = lincsd->nOrder;
 146
 147     for (rec = 0; rec < nrec; rec++)
 148     {
 149 #pragma omp barrier
 150         for (b = b0; b < b1; b++)
 151         {
 152             mvb = 0;
 153             for (n = blnr[b]; n < blnr[b+1]; n++)
 154             {
 155                 j   = blbnb[n];
 156                 mvb = mvb + blcc[n]*rhs1[j];
 157             }
 158             rhs2[b] = mvb;
 159             sol[b]  = sol[b] + mvb;
 160         }
 161         swap = rhs1;
 162         rhs1 = rhs2;
 163         rhs2 = swap;
 164     } /* nrec*(ncons+2*nrtot) flops */
 165
 166     if (ntriangle > 0)
 167     {
 168         /* Perform an extra nrec recursions for only the constraints
 169          * involved in rigid triangles.
 170          * In this way their accuracy should come close to those of the other
 171          * constraints, since traingles of constraints can produce eigenvalues
 172          * around 0.7, while the effective eigenvalue for bond constraints
 173          * is around 0.4 (and 0.7*0.7=0.5).
 174          */
 175         /* We need to copy the temporary array, since only the elements
 176          * for constraints involved in triangles are updated and then
 177          * the pointers are swapped. This saving copying the whole arrary.
 178          * We need barrier as other threads might still be reading from rhs2.
 179          */
 180 #pragma omp barrier
 181         for (b = b0; b < b1; b++)
 182         {
 183             rhs2[b] = rhs1[b];
 184         }
 185 #pragma omp barrier
 186 #pragma omp master
 187         {
 188             for (rec = 0; rec < nrec; rec++)
 189             {
 190                 for (tb = 0; tb < ntriangle; tb++)
 191                 {
 192                     b    = triangle[tb];
 193                     bits = tri_bits[tb];
 194                     mvb  = 0;
 195                     nr0  = blnr[b];
 196                     nr1  = blnr[b+1];
 197                     for (n = nr0; n < nr1; n++)
 198                     {
 199                         if (bits & (1<<(n-nr0)))
 200                         {
 201                             j   = blbnb[n];
 202                             mvb = mvb + blcc[n]*rhs1[j];
 203                         }
 204                     }
 205                     rhs2[b] = mvb;
 206                     sol[b]  = sol[b] + mvb;
 207                 }
 208                 swap = rhs1;
 209                 rhs1 = rhs2;
 210                 rhs2 = swap;
 211             }
 212         } /* flops count is missing here */
 213
 214         /* We need a barrier here as the calling routine will continue
 215          * to operate on the thread-local constraints without barrier.
 216          */
 217 #pragma omp barrier
 218     }
 219 }
 220
 221 static void lincs_update_atoms_noind(int ncons, const int *bla,
 222                                      real prefac,
 223                                      const real *fac, rvec *r,
 224                                      const real *invmass,
 225                                      rvec *x)
 226 {
 227     int  b, i, j;
 228     real mvb, im1, im2, tmp0, tmp1, tmp2;
 229
 230     if (invmass != NULL)
 231     {
 232         for (b = 0; b < ncons; b++)
 233         {
 234             i        = bla[2*b];
 235             j        = bla[2*b+1];
 236             mvb      = prefac*fac[b];
 237             im1      = invmass[i];
 238             im2      = invmass[j];
 239             tmp0     = r[b][0]*mvb;
 240             tmp1     = r[b][1]*mvb;
 241             tmp2     = r[b][2]*mvb;
 242             x[i][0] -= tmp0*im1;
 243             x[i][1] -= tmp1*im1;
 244             x[i][2] -= tmp2*im1;
 245             x[j][0] += tmp0*im2;
 246             x[j][1] += tmp1*im2;
 247             x[j][2] += tmp2*im2;
 248         } /* 16 ncons flops */
 249     }
 250     else
 251     {
 252         for (b = 0; b < ncons; b++)
 253         {
 254             i        = bla[2*b];
 255             j        = bla[2*b+1];
 256             mvb      = prefac*fac[b];
 257             tmp0     = r[b][0]*mvb;
 258             tmp1     = r[b][1]*mvb;
 259             tmp2     = r[b][2]*mvb;
 260             x[i][0] -= tmp0;
 261             x[i][1] -= tmp1;
 262             x[i][2] -= tmp2;
 263             x[j][0] += tmp0;
 264             x[j][1] += tmp1;
 265             x[j][2] += tmp2;
 266         }
 267     }
 268 }
 269
 270 static void lincs_update_atoms_ind(int ncons, const int *ind, const int *bla,
 271                                    real prefac,
 272                                    const real *fac, rvec *r,
 273                                    const real *invmass,
 274                                    rvec *x)
 275 {
 276     int  bi, b, i, j;
 277     real mvb, im1, im2, tmp0, tmp1, tmp2;
 278
 279     if (invmass != NULL)
 280     {
 281         for (bi = 0; bi < ncons; bi++)
 282         {
 283             b        = ind[bi];
 284             i        = bla[2*b];
 285             j        = bla[2*b+1];
 286             mvb      = prefac*fac[b];
 287             im1      = invmass[i];
 288             im2      = invmass[j];
 289             tmp0     = r[b][0]*mvb;
 290             tmp1     = r[b][1]*mvb;
 291             tmp2     = r[b][2]*mvb;
 292             x[i][0] -= tmp0*im1;
 293             x[i][1] -= tmp1*im1;
 294             x[i][2] -= tmp2*im1;
 295             x[j][0] += tmp0*im2;
 296             x[j][1] += tmp1*im2;
 297             x[j][2] += tmp2*im2;
 298         } /* 16 ncons flops */
 299     }
 300     else
 301     {
 302         for (bi = 0; bi < ncons; bi++)
 303         {
 304             b        = ind[bi];
 305             i        = bla[2*b];
 306             j        = bla[2*b+1];
 307             mvb      = prefac*fac[b];
 308             tmp0     = r[b][0]*mvb;
 309             tmp1     = r[b][1]*mvb;
 310             tmp2     = r[b][2]*mvb;
 311             x[i][0] -= tmp0;
 312             x[i][1] -= tmp1;
 313             x[i][2] -= tmp2;
 314             x[j][0] += tmp0;
 315             x[j][1] += tmp1;
 316             x[j][2] += tmp2;
 317         } /* 16 ncons flops */
 318     }
 319 }
 320
 321 static void lincs_update_atoms(struct gmx_lincsdata *li, int th,
 322                                real prefac,
 323                                const real *fac, rvec *r,
 324                                const real *invmass,
 325                                rvec *x)
 326 {
 327     if (li->nth == 1)
 328     {
 329         /* Single thread, we simply update for all constraints */
 330         lincs_update_atoms_noind(li->nc, li->bla, prefac, fac, r, invmass, x);
 331     }
 332     else
 333     {
 334         /* Update the atom vector components for our thread local
 335          * constraints that only access our local atom range.
 336          * This can be done without a barrier.
 337          */
 338         lincs_update_atoms_ind(li->th[th].nind, li->th[th].ind,
 339                                li->bla, prefac, fac, r, invmass, x);
 340
 341         if (li->th[li->nth].nind > 0)
 342         {
 343             /* Update the constraints that operate on atoms
 344              * in multiple thread atom blocks on the master thread.
 345              */
 346 #pragma omp barrier
 347 #pragma omp master
 348             {
 349                 lincs_update_atoms_ind(li->th[li->nth].nind,
 350                                        li->th[li->nth].ind,
 351                                        li->bla, prefac, fac, r, invmass, x);
 352             }
 353         }
 354     }
 355 }
 356
 357 /* LINCS projection, works on derivatives of the coordinates */
 358 static void do_lincsp(rvec *x, rvec *f, rvec *fp, t_pbc *pbc,
 359                       struct gmx_lincsdata *lincsd, int th,
 360                       real *invmass,
 361                       int econq, real *dvdlambda,
 362                       gmx_bool bCalcVir, tensor rmdf)
 363 {
 364     int      b0, b1, b, i, j, k, n;
 365     real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, wfac, lam;
 366     rvec     dx;
 367     int     *bla, *blnr, *blbnb;
 368     rvec    *r;
 369     real    *blc, *blmf, *blcc, *rhs1, *rhs2, *sol;
 370
 371     b0 = lincsd->th[th].b0;
 372     b1 = lincsd->th[th].b1;
 373
 374     bla    = lincsd->bla;
 375     r      = lincsd->tmpv;
 376     blnr   = lincsd->blnr;
 377     blbnb  = lincsd->blbnb;
 378     if (econq != econqForce)
 379     {
 380         /* Use mass-weighted parameters */
 381         blc  = lincsd->blc;
 382         blmf = lincsd->blmf;
 383     }
 384     else
 385     {
 386         /* Use non mass-weighted parameters */
 387         blc  = lincsd->blc1;
 388         blmf = lincsd->blmf1;
 389     }
 390     blcc   = lincsd->tmpncc;
 391     rhs1   = lincsd->tmp1;
 392     rhs2   = lincsd->tmp2;
 393     sol    = lincsd->tmp3;
 394
 395     /* Compute normalized i-j vectors */
 396     if (pbc)
 397     {
 398         for (b = b0; b < b1; b++)
 399         {
 400             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
 401             unitv(dx, r[b]);
 402         }
 403     }
 404     else
 405     {
 406         for (b = b0; b < b1; b++)
 407         {
 408             rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
 409             unitv(dx, r[b]);
 410         } /* 16 ncons flops */
 411     }
 412
 413 #pragma omp barrier
 414     for (b = b0; b < b1; b++)
 415     {
 416         tmp0 = r[b][0];
 417         tmp1 = r[b][1];
 418         tmp2 = r[b][2];
 419         i    = bla[2*b];
 420         j    = bla[2*b+1];
 421         for (n = blnr[b]; n < blnr[b+1]; n++)
 422         {
 423             k       = blbnb[n];
 424             blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 425         } /* 6 nr flops */
 426         mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
 427                       tmp1*(f[i][1] - f[j][1]) +
 428                       tmp2*(f[i][2] - f[j][2]));
 429         rhs1[b] = mvb;
 430         sol[b]  = mvb;
 431         /* 7 flops */
 432     }
 433     /* Together: 23*ncons + 6*nrtot flops */
 434
 435     lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 436     /* nrec*(ncons+2*nrtot) flops */
 437
 438     if (econq == econqDeriv_FlexCon)
 439     {
 440         /* We only want to constraint the flexible constraints,
 441          * so we mask out the normal ones by setting sol to 0.
 442          */
 443         for (b = b0; b < b1; b++)
 444         {
 445             if (!(lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
 446             {
 447                 sol[b] = 0;
 448             }
 449         }
 450     }
 451
 452     /* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
 453     for (b = b0; b < b1; b++)
 454     {
 455         sol[b] *= blc[b];
 456     }
 457
 458     /* When constraining forces, we should not use mass weighting,
 459      * so we pass invmass=NULL, which results in the use of 1 for all atoms.
 460      */
 461     lincs_update_atoms(lincsd, th, 1.0, sol, r,
 462                        (econq != econqForce) ? invmass : NULL, fp);
 463
 464     if (dvdlambda != NULL)
 465     {
 466 #pragma omp barrier
 467         for (b = b0; b < b1; b++)
 468         {
 469             *dvdlambda -= sol[b]*lincsd->ddist[b];
 470         }
 471         /* 10 ncons flops */
 472     }
 473
 474     if (bCalcVir)
 475     {
 476         /* Constraint virial,
 477          * determines sum r_bond x delta f,
 478          * where delta f is the constraint correction
 479          * of the quantity that is being constrained.
 480          */
 481         for (b = b0; b < b1; b++)
 482         {
 483             mvb = lincsd->bllen[b]*sol[b];
 484             for (i = 0; i < DIM; i++)
 485             {
 486                 tmp1 = mvb*r[b][i];
 487                 for (j = 0; j < DIM; j++)
 488                 {
 489                     rmdf[i][j] += tmp1*r[b][j];
 490                 }
 491             }
 492         } /* 23 ncons flops */
 493     }
 494 }
 495
 496 static void do_lincs(rvec *x, rvec *xp, matrix box, t_pbc *pbc,
 497                      struct gmx_lincsdata *lincsd, int th,
 498                      real *invmass,
 499                      t_commrec *cr,
 500                      gmx_bool bCalcLambda,
 501                      real wangle, int *warn,
 502                      real invdt, rvec *v,
 503                      gmx_bool bCalcVir, tensor vir_r_m_dr)
 504 {
 505     int      b0, b1, b, i, j, k, n, iter;
 506     real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, len2, dlen2, wfac;
 507     rvec     dx;
 508     int     *bla, *blnr, *blbnb;
 509     rvec    *r;
 510     real    *blc, *blmf, *bllen, *blcc, *rhs1, *rhs2, *sol, *blc_sol, *mlambda;
 511     int     *nlocat;
 512
 513     b0 = lincsd->th[th].b0;
 514     b1 = lincsd->th[th].b1;
 515
 516     bla     = lincsd->bla;
 517     r       = lincsd->tmpv;
 518     blnr    = lincsd->blnr;
 519     blbnb   = lincsd->blbnb;
 520     blc     = lincsd->blc;
 521     blmf    = lincsd->blmf;
 522     bllen   = lincsd->bllen;
 523     blcc    = lincsd->tmpncc;
 524     rhs1    = lincsd->tmp1;
 525     rhs2    = lincsd->tmp2;
 526     sol     = lincsd->tmp3;
 527     blc_sol = lincsd->tmp4;
 528     mlambda = lincsd->mlambda;
 529
 530     if (DOMAINDECOMP(cr) && cr->dd->constraints)
 531     {
 532         nlocat = dd_constraints_nlocalatoms(cr->dd);
 533     }
 534     else if (PARTDECOMP(cr))
 535     {
 536         nlocat = pd_constraints_nlocalatoms(cr->pd);
 537     }
 538     else
 539     {
 540         nlocat = NULL;
 541     }
 542
 543     if (pbc)
 544     {
 545         /* Compute normalized i-j vectors */
 546         for (b = b0; b < b1; b++)
 547         {
 548             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
 549             unitv(dx, r[b]);
 550         }
 551 #pragma omp barrier
 552         for (b = b0; b < b1; b++)
 553         {
 554             for (n = blnr[b]; n < blnr[b+1]; n++)
 555             {
 556                 blcc[n] = blmf[n]*iprod(r[b], r[blbnb[n]]);
 557             }
 558             pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
 559             mvb     = blc[b]*(iprod(r[b], dx) - bllen[b]);
 560             rhs1[b] = mvb;
 561             sol[b]  = mvb;
 562         }
 563     }
 564     else
 565     {
 566         /* Compute normalized i-j vectors */
 567         for (b = b0; b < b1; b++)
 568         {
 569             i       = bla[2*b];
 570             j       = bla[2*b+1];
 571             tmp0    = x[i][0] - x[j][0];
 572             tmp1    = x[i][1] - x[j][1];
 573             tmp2    = x[i][2] - x[j][2];
 574             rlen    = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
 575             r[b][0] = rlen*tmp0;
 576             r[b][1] = rlen*tmp1;
 577             r[b][2] = rlen*tmp2;
 578         } /* 16 ncons flops */
 579
 580 #pragma omp barrier
 581         for (b = b0; b < b1; b++)
 582         {
 583             tmp0 = r[b][0];
 584             tmp1 = r[b][1];
 585             tmp2 = r[b][2];
 586             len  = bllen[b];
 587             i    = bla[2*b];
 588             j    = bla[2*b+1];
 589             for (n = blnr[b]; n < blnr[b+1]; n++)
 590             {
 591                 k       = blbnb[n];
 592                 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
 593             } /* 6 nr flops */
 594             mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
 595                           tmp1*(xp[i][1] - xp[j][1]) +
 596                           tmp2*(xp[i][2] - xp[j][2]) - len);
 597             rhs1[b] = mvb;
 598             sol[b]  = mvb;
 599             /* 10 flops */
 600         }
 601         /* Together: 26*ncons + 6*nrtot flops */
 602     }
 603
 604     lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 605     /* nrec*(ncons+2*nrtot) flops */
 606
 607     for (b = b0; b < b1; b++)
 608     {
 609         mlambda[b] = blc[b]*sol[b];
 610     }
 611
 612     /* Update the coordinates */
 613     lincs_update_atoms(lincsd, th, 1.0, mlambda, r, invmass, xp);
 614
 615     /*
 616      ********  Correction for centripetal effects  ********
 617      */
 618
 619     wfac = cos(DEG2RAD*wangle);
 620     wfac = wfac*wfac;
 621
 622     for (iter = 0; iter < lincsd->nIter; iter++)
 623     {
 624         if ((lincsd->bCommIter && DOMAINDECOMP(cr) && cr->dd->constraints) ||
 625             PARTDECOMP(cr))
 626         {
 627 #pragma omp barrier
 628 #pragma omp master
 629             {
 630                 /* Communicate the corrected non-local coordinates */
 631                 if (DOMAINDECOMP(cr))
 632                 {
 633                     dd_move_x_constraints(cr->dd, box, xp, NULL);
 634                 }
 635                 else
 636                 {
 637                     pd_move_x_constraints(cr, xp, NULL);
 638                 }
 639             }
 640         }
 641
 642 #pragma omp barrier
 643         for (b = b0; b < b1; b++)
 644         {
 645             len = bllen[b];
 646             if (pbc)
 647             {
 648                 pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
 649             }
 650             else
 651             {
 652                 rvec_sub(xp[bla[2*b]], xp[bla[2*b+1]], dx);
 653             }
 654             len2  = len*len;
 655             dlen2 = 2*len2 - norm2(dx);
 656             if (dlen2 < wfac*len2 && (nlocat == NULL || nlocat[b]))
 657             {
 658                 *warn = b;
 659             }
 660             if (dlen2 > 0)
 661             {
 662                 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
 663             }
 664             else
 665             {
 666                 mvb = blc[b]*len;
 667             }
 668             rhs1[b] = mvb;
 669             sol[b]  = mvb;
 670         } /* 20*ncons flops */
 671
 672         lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
 673         /* nrec*(ncons+2*nrtot) flops */
 674
 675         for (b = b0; b < b1; b++)
 676         {
 677             mvb         = blc[b]*sol[b];
 678             blc_sol[b]  = mvb;
 679             mlambda[b] += mvb;
 680         }
 681
 682         /* Update the coordinates */
 683         lincs_update_atoms(lincsd, th, 1.0, blc_sol, r, invmass, xp);
 684     }
 685     /* nit*ncons*(37+9*nrec) flops */
 686
 687     if (v != NULL)
 688     {
 689         /* Update the velocities */
 690         lincs_update_atoms(lincsd, th, invdt, mlambda, r, invmass, v);
 691         /* 16 ncons flops */
 692     }
 693
 694     if (nlocat != NULL && bCalcLambda)
 695     {
 696         /* In lincs_update_atoms thread might cross-read mlambda */
 697 #pragma omp barrier
 698
 699         /* Only account for local atoms */
 700         for (b = b0; b < b1; b++)
 701         {
 702             mlambda[b] *= 0.5*nlocat[b];
 703         }
 704     }
 705
 706     if (bCalcVir)
 707     {
 708         /* Constraint virial */
 709         for (b = b0; b < b1; b++)
 710         {
 711             tmp0 = -bllen[b]*mlambda[b];
 712             for (i = 0; i < DIM; i++)
 713             {
 714                 tmp1 = tmp0*r[b][i];
 715                 for (j = 0; j < DIM; j++)
 716                 {
 717                     vir_r_m_dr[i][j] -= tmp1*r[b][j];
 718                 }
 719             }
 720         } /* 22 ncons flops */
 721     }
 722
 723     /* Total:
 724      * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
 725      * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
 726      *
 727      * (26+nrec)*ncons + (6+2*nrec)*nrtot
 728      * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
 729      * if nit=1
 730      * (63+nrec)*ncons + (6+4*nrec)*nrtot
 731      */
 732 }
 733
 734 void set_lincs_matrix(struct gmx_lincsdata *li, real *invmass, real lambda)
 735 {
 736     int        i, a1, a2, n, k, sign, center;
 737     int        end, nk, kk;
 738     const real invsqrt2 = 0.7071067811865475244;
 739
 740     for (i = 0; (i < li->nc); i++)
 741     {
 742         a1          = li->bla[2*i];
 743         a2          = li->bla[2*i+1];
 744         li->blc[i]  = gmx_invsqrt(invmass[a1] + invmass[a2]);
 745         li->blc1[i] = invsqrt2;
 746     }
 747
 748     /* Construct the coupling coefficient matrix blmf */
 749     li->ntriangle    = 0;
 750     li->ncc_triangle = 0;
 751     for (i = 0; (i < li->nc); i++)
 752     {
 753         a1 = li->bla[2*i];
 754         a2 = li->bla[2*i+1];
 755         for (n = li->blnr[i]; (n < li->blnr[i+1]); n++)
 756         {
 757             k = li->blbnb[n];
 758             if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
 759             {
 760                 sign = -1;
 761             }
 762             else
 763             {
 764                 sign = 1;
 765             }
 766             if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
 767             {
 768                 center = a1;
 769                 end    = a2;
 770             }
 771             else
 772             {
 773                 center = a2;
 774                 end    = a1;
 775             }
 776             li->blmf[n]  = sign*invmass[center]*li->blc[i]*li->blc[k];
 777             li->blmf1[n] = sign*0.5;
 778             if (li->ncg_triangle > 0)
 779             {
 780                 /* Look for constraint triangles */
 781                 for (nk = li->blnr[k]; (nk < li->blnr[k+1]); nk++)
 782                 {
 783                     kk = li->blbnb[nk];
 784                     if (kk != i && kk != k &&
 785                         (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
 786                     {
 787                         if (li->ntriangle == 0 ||
 788                             li->triangle[li->ntriangle-1] < i)
 789                         {
 790                             /* Add this constraint to the triangle list */
 791                             li->triangle[li->ntriangle] = i;
 792                             li->tri_bits[li->ntriangle] = 0;
 793                             li->ntriangle++;
 794                             if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
 795                             {
 796                                 gmx_fatal(FARGS, "A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
 797                                           li->blnr[i+1] - li->blnr[i],
 798                                           sizeof(li->tri_bits[0])*8-1);
 799                             }
 800                         }
 801                         li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
 802                         li->ncc_triangle++;
 803                     }
 804                 }
 805             }
 806         }
 807     }
 808
 809     if (debug)
 810     {
 811         fprintf(debug, "Of the %d constraints %d participate in triangles\n",
 812                 li->nc, li->ntriangle);
 813         fprintf(debug, "There are %d couplings of which %d in triangles\n",
 814                 li->ncc, li->ncc_triangle);
 815     }
 816
 817     /* Set matlam,
 818      * so we know with which lambda value the masses have been set.
 819      */
 820     li->matlam = lambda;
 821 }
 822
 823 static int count_triangle_constraints(t_ilist *ilist, t_blocka *at2con)
 824 {
 825     int      ncon1, ncon_tot;
 826     int      c0, a00, a01, n1, c1, a10, a11, ac1, n2, c2, a20, a21;
 827     int      ncon_triangle;
 828     gmx_bool bTriangle;
 829     t_iatom *ia1, *ia2, *iap;
 830
 831     ncon1    = ilist[F_CONSTR].nr/3;
 832     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 833
 834     ia1 = ilist[F_CONSTR].iatoms;
 835     ia2 = ilist[F_CONSTRNC].iatoms;
 836
 837     ncon_triangle = 0;
 838     for (c0 = 0; c0 < ncon_tot; c0++)
 839     {
 840         bTriangle = FALSE;
 841         iap       = constr_iatomptr(ncon1, ia1, ia2, c0);
 842         a00       = iap[1];
 843         a01       = iap[2];
 844         for (n1 = at2con->index[a01]; n1 < at2con->index[a01+1]; n1++)
 845         {
 846             c1 = at2con->a[n1];
 847             if (c1 != c0)
 848             {
 849                 iap = constr_iatomptr(ncon1, ia1, ia2, c1);
 850                 a10 = iap[1];
 851                 a11 = iap[2];
 852                 if (a10 == a01)
 853                 {
 854                     ac1 = a11;
 855                 }
 856                 else
 857                 {
 858                     ac1 = a10;
 859                 }
 860                 for (n2 = at2con->index[ac1]; n2 < at2con->index[ac1+1]; n2++)
 861                 {
 862                     c2 = at2con->a[n2];
 863                     if (c2 != c0 && c2 != c1)
 864                     {
 865                         iap = constr_iatomptr(ncon1, ia1, ia2, c2);
 866                         a20 = iap[1];
 867                         a21 = iap[2];
 868                         if (a20 == a00 || a21 == a00)
 869                         {
 870                             bTriangle = TRUE;
 871                         }
 872                     }
 873                 }
 874             }
 875         }
 876         if (bTriangle)
 877         {
 878             ncon_triangle++;
 879         }
 880     }
 881
 882     return ncon_triangle;
 883 }
 884
 885 static gmx_bool more_than_two_sequential_constraints(const t_ilist  *ilist,
 886                                                      const t_blocka *at2con)
 887 {
 888     t_iatom  *ia1, *ia2, *iap;
 889     int       ncon1, ncon_tot, c;
 890     int       a1, a2;
 891     gmx_bool  bMoreThanTwoSequentialConstraints;
 892
 893     ncon1    = ilist[F_CONSTR].nr/3;
 894     ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
 895
 896     ia1 = ilist[F_CONSTR].iatoms;
 897     ia2 = ilist[F_CONSTRNC].iatoms;
 898
 899     bMoreThanTwoSequentialConstraints = FALSE;
 900     for (c = 0; c < ncon_tot && !bMoreThanTwoSequentialConstraints; c++)
 901     {
 902         iap = constr_iatomptr(ncon1, ia1, ia2, c);
 903         a1  = iap[1];
 904         a2  = iap[2];
 905         /* Check if this constraint has constraints connected at both atoms */
 906         if (at2con->index[a1+1] - at2con->index[a1] > 1 &&
 907             at2con->index[a2+1] - at2con->index[a2] > 1)
 908         {
 909             bMoreThanTwoSequentialConstraints = TRUE;
 910         }
 911     }
 912
 913     return bMoreThanTwoSequentialConstraints;
 914 }
 915
 916 static int int_comp(const void *a, const void *b)
 917 {
 918     return (*(int *)a) - (*(int *)b);
 919 }
 920
 921 gmx_lincsdata_t init_lincs(FILE *fplog, gmx_mtop_t *mtop,
 922                            int nflexcon_global, t_blocka *at2con,
 923                            gmx_bool bPLINCS, int nIter, int nProjOrder)
 924 {
 925     struct gmx_lincsdata *li;
 926     int                   mb;
 927     gmx_moltype_t        *molt;
 928
 929     if (fplog)
 930     {
 931         fprintf(fplog, "\nInitializing%s LINear Constraint Solver\n",
 932                 bPLINCS ? " Parallel" : "");
 933     }
 934
 935     snew(li, 1);
 936
 937     li->ncg      =
 938         gmx_mtop_ftype_count(mtop, F_CONSTR) +
 939         gmx_mtop_ftype_count(mtop, F_CONSTRNC);
 940     li->ncg_flex = nflexcon_global;
 941
 942     li->nIter  = nIter;
 943     li->nOrder = nProjOrder;
 944
 945     li->ncg_triangle = 0;
 946     li->bCommIter    = FALSE;
 947     for (mb = 0; mb < mtop->nmolblock; mb++)
 948     {
 949         molt              = &mtop->moltype[mtop->molblock[mb].type];
 950         li->ncg_triangle +=
 951             mtop->molblock[mb].nmol*
 952             count_triangle_constraints(molt->ilist,
 953                                        &at2con[mtop->molblock[mb].type]);
 954         if (bPLINCS && li->bCommIter == FALSE)
 955         {
 956             /* Check if we need to communicate not only before LINCS,
 957              * but also before each iteration.
 958              * The check for only two sequential constraints is only
 959              * useful for the common case of H-bond only constraints.
 960              * With more effort we could also make it useful for small
 961              * molecules with nr. sequential constraints <= nOrder-1.
 962              */
 963             li->bCommIter = (li->nOrder < 1 || more_than_two_sequential_constraints(molt->ilist, &at2con[mtop->molblock[mb].type]));
 964         }
 965     }
 966     if (debug && bPLINCS)
 967     {
 968         fprintf(debug, "PLINCS communication before each iteration: %d\n",
 969                 li->bCommIter);
 970     }
 971
 972     /* LINCS can run on any number of threads.
 973      * Currently the number is fixed for the whole simulation,
 974      * but it could be set in set_lincs().
 975      */
 976     li->nth = gmx_omp_nthreads_get(emntLINCS);
 977     if (li->nth == 1)
 978     {
 979         snew(li->th, 1);
 980     }
 981     else
 982     {
 983         /* Allocate an extra elements for "thread-overlap" constraints */
 984         snew(li->th, li->nth+1);
 985     }
 986     if (debug)
 987     {
 988         fprintf(debug, "LINCS: using %d threads\n", li->nth);
 989     }
 990
 991     if (bPLINCS || li->ncg_triangle > 0)
 992     {
 993         please_cite(fplog, "Hess2008a");
 994     }
 995     else
 996     {
 997         please_cite(fplog, "Hess97a");
 998     }
 999
1000     if (fplog)
1001     {
1002         fprintf(fplog, "The number of constraints is %d\n", li->ncg);
1003         if (bPLINCS)
1004         {
1005             fprintf(fplog, "There are inter charge-group constraints,\n"
1006                     "will communicate selected coordinates each lincs iteration\n");
1007         }
1008         if (li->ncg_triangle > 0)
1009         {
1010             fprintf(fplog,
1011                     "%d constraints are involved in constraint triangles,\n"
1012                     "will apply an additional matrix expansion of order %d for couplings\n"
1013                     "between constraints inside triangles\n",
1014                     li->ncg_triangle, li->nOrder);
1015         }
1016     }
1017
1018     return li;
1019 }
1020
1021 /* Sets up the work division over the threads */
1022 static void lincs_thread_setup(struct gmx_lincsdata *li, int natoms)
1023 {
1024     lincs_thread_t *li_m;
1025     int             th;
1026     unsigned       *atf;
1027     int             a;
1028
1029     if (natoms > li->atf_nalloc)
1030     {
1031         li->atf_nalloc = over_alloc_large(natoms);
1032         srenew(li->atf, li->atf_nalloc);
1033     }
1034
1035     atf = li->atf;
1036     /* Clear the atom flags */
1037     for (a = 0; a < natoms; a++)
1038     {
1039         atf[a] = 0;
1040     }
1041
1042     for (th = 0; th < li->nth; th++)
1043     {
1044         lincs_thread_t *li_th;
1045         int             b;
1046
1047         li_th = &li->th[th];
1048
1049         /* The constraints are divided equally over the threads */
1050         li_th->b0 = (li->nc* th   )/li->nth;
1051         li_th->b1 = (li->nc*(th+1))/li->nth;
1052
1053         if (th < sizeof(*atf)*8)
1054         {
1055             /* For each atom set a flag for constraints from each */
1056             for (b = li_th->b0; b < li_th->b1; b++)
1057             {
1058                 atf[li->bla[b*2]  ] |= (1U<<th);
1059                 atf[li->bla[b*2+1]] |= (1U<<th);
1060             }
1061         }
1062     }
1063
1064 #pragma omp parallel for num_threads(li->nth) schedule(static)
1065     for (th = 0; th < li->nth; th++)
1066     {
1067         lincs_thread_t *li_th;
1068         unsigned        mask;
1069         int             b;
1070
1071         li_th = &li->th[th];
1072
1073         if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
1074         {
1075             li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
1076             srenew(li_th->ind, li_th->ind_nalloc);
1077             srenew(li_th->ind_r, li_th->ind_nalloc);
1078         }
1079
1080         if (th < sizeof(*atf)*8)
1081         {
1082             mask = (1U<<th) - 1U;
1083
1084             li_th->nind   = 0;
1085             li_th->nind_r = 0;
1086             for (b = li_th->b0; b < li_th->b1; b++)
1087             {
1088                 /* We let the constraint with the lowest thread index
1089                  * operate on atoms with constraints from multiple threads.
1090                  */
1091                 if (((atf[li->bla[b*2]]   & mask) == 0) &&
1092                     ((atf[li->bla[b*2+1]] & mask) == 0))
1093                 {
1094                     /* Add the constraint to the local atom update index */
1095                     li_th->ind[li_th->nind++] = b;
1096                 }
1097                 else
1098                 {
1099                     /* Add the constraint to the rest block */
1100                     li_th->ind_r[li_th->nind_r++] = b;
1101                 }
1102             }
1103         }
1104         else
1105         {
1106             /* We are out of bits, assign all constraints to rest */
1107             for (b = li_th->b0; b < li_th->b1; b++)
1108             {
1109                 li_th->ind_r[li_th->nind_r++] = b;
1110             }
1111         }
1112     }
1113
1114     /* We need to copy all constraints which have not be assigned
1115      * to a thread to a separate list which will be handled by one thread.
1116      */
1117     li_m = &li->th[li->nth];
1118
1119     li_m->nind = 0;
1120     for (th = 0; th < li->nth; th++)
1121     {
1122         lincs_thread_t *li_th;
1123         int             b;
1124
1125         li_th   = &li->th[th];
1126
1127         if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1128         {
1129             li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1130             srenew(li_m->ind, li_m->ind_nalloc);
1131         }
1132
1133         for (b = 0; b < li_th->nind_r; b++)
1134         {
1135             li_m->ind[li_m->nind++] = li_th->ind_r[b];
1136         }
1137
1138         if (debug)
1139         {
1140             fprintf(debug, "LINCS thread %d: %d constraints\n",
1141                     th, li_th->nind);
1142         }
1143     }
1144
1145     if (debug)
1146     {
1147         fprintf(debug, "LINCS thread r: %d constraints\n",
1148                 li_m->nind);
1149     }
1150 }
1151
1152
1153 void set_lincs(t_idef *idef, t_mdatoms *md,
1154                gmx_bool bDynamics, t_commrec *cr,
1155                struct gmx_lincsdata *li)
1156 {
1157     int          start, natoms, nflexcon;
1158     t_blocka     at2con;
1159     t_iatom     *iatom;
1160     int          i, k, ncc_alloc, ni, con, nconnect, concon;
1161     int          type, a1, a2;
1162     real         lenA = 0, lenB;
1163     gmx_bool     bLocal;
1164
1165     li->nc  = 0;
1166     li->ncc = 0;
1167     /* Zero the thread index ranges.
1168      * Otherwise without local constraints we could return with old ranges.
1169      */
1170     for (i = 0; i < li->nth; i++)
1171     {
1172         li->th[i].b0   = 0;
1173         li->th[i].b1   = 0;
1174         li->th[i].nind = 0;
1175     }
1176     if (li->nth > 1)
1177     {
1178         li->th[li->nth].nind = 0;
1179     }
1180
1181     /* This is the local topology, so there are only F_CONSTR constraints */
1182     if (idef->il[F_CONSTR].nr == 0)
1183     {
1184         /* There are no constraints,
1185          * we do not need to fill any data structures.
1186          */
1187         return;
1188     }
1189
1190     if (debug)
1191     {
1192         fprintf(debug, "Building the LINCS connectivity\n");
1193     }
1194
1195     if (DOMAINDECOMP(cr))
1196     {
1197         if (cr->dd->constraints)
1198         {
1199             dd_get_constraint_range(cr->dd, &start, &natoms);
1200         }
1201         else
1202         {
1203             natoms = cr->dd->nat_home;
1204         }
1205         start = 0;
1206     }
1207     else if (PARTDECOMP(cr))
1208     {
1209         pd_get_constraint_range(cr->pd, &start, &natoms);
1210     }
1211     else
1212     {
1213         start  = md->start;
1214         natoms = md->homenr;
1215     }
1216     at2con = make_at2con(start, natoms, idef->il, idef->iparams, bDynamics,
1217                          &nflexcon);
1218
1219
1220     if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1221     {
1222         li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1223         srenew(li->bllen0, li->nc_alloc);
1224         srenew(li->ddist, li->nc_alloc);
1225         srenew(li->bla, 2*li->nc_alloc);
1226         srenew(li->blc, li->nc_alloc);
1227         srenew(li->blc1, li->nc_alloc);
1228         srenew(li->blnr, li->nc_alloc+1);
1229         srenew(li->bllen, li->nc_alloc);
1230         srenew(li->tmpv, li->nc_alloc);
1231         srenew(li->tmp1, li->nc_alloc);
1232         srenew(li->tmp2, li->nc_alloc);
1233         srenew(li->tmp3, li->nc_alloc);
1234         srenew(li->tmp4, li->nc_alloc);
1235         srenew(li->mlambda, li->nc_alloc);
1236         if (li->ncg_triangle > 0)
1237         {
1238             /* This is allocating too much, but it is difficult to improve */
1239             srenew(li->triangle, li->nc_alloc);
1240             srenew(li->tri_bits, li->nc_alloc);
1241         }
1242     }
1243
1244     iatom = idef->il[F_CONSTR].iatoms;
1245
1246     ncc_alloc   = li->ncc_alloc;
1247     li->blnr[0] = 0;
1248
1249     ni = idef->il[F_CONSTR].nr/3;
1250
1251     con           = 0;
1252     nconnect      = 0;
1253     li->blnr[con] = nconnect;
1254     for (i = 0; i < ni; i++)
1255     {
1256         bLocal = TRUE;
1257         type   = iatom[3*i];
1258         a1     = iatom[3*i+1];
1259         a2     = iatom[3*i+2];
1260         lenA   = idef->iparams[type].constr.dA;
1261         lenB   = idef->iparams[type].constr.dB;
1262         /* Skip the flexible constraints when not doing dynamics */
1263         if (bDynamics || lenA != 0 || lenB != 0)
1264         {
1265             li->bllen0[con]  = lenA;
1266             li->ddist[con]   = lenB - lenA;
1267             /* Set the length to the topology A length */
1268             li->bllen[con]   = li->bllen0[con];
1269             li->bla[2*con]   = a1;
1270             li->bla[2*con+1] = a2;
1271             /* Construct the constraint connection matrix blbnb */
1272             for (k = at2con.index[a1-start]; k < at2con.index[a1-start+1]; k++)
1273             {
1274                 concon = at2con.a[k];
1275                 if (concon != i)
1276                 {
1277                     if (nconnect >= ncc_alloc)
1278                     {
1279                         ncc_alloc = over_alloc_small(nconnect+1);
1280                         srenew(li->blbnb, ncc_alloc);
1281                     }
1282                     li->blbnb[nconnect++] = concon;
1283                 }
1284             }
1285             for (k = at2con.index[a2-start]; k < at2con.index[a2-start+1]; k++)
1286             {
1287                 concon = at2con.a[k];
1288                 if (concon != i)
1289                 {
1290                     if (nconnect+1 > ncc_alloc)
1291                     {
1292                         ncc_alloc = over_alloc_small(nconnect+1);
1293                         srenew(li->blbnb, ncc_alloc);
1294                     }
1295                     li->blbnb[nconnect++] = concon;
1296                 }
1297             }
1298             li->blnr[con+1] = nconnect;
1299
1300             if (cr->dd == NULL)
1301             {
1302                 /* Order the blbnb matrix to optimize memory access */
1303                 qsort(&(li->blbnb[li->blnr[con]]), li->blnr[con+1]-li->blnr[con],
1304                       sizeof(li->blbnb[0]), int_comp);
1305             }
1306             /* Increase the constraint count */
1307             con++;
1308         }
1309     }
1310
1311     done_blocka(&at2con);
1312
1313     /* This is the real number of constraints,
1314      * without dynamics the flexible constraints are not present.
1315      */
1316     li->nc = con;
1317
1318     li->ncc = li->blnr[con];
1319     if (cr->dd == NULL)
1320     {
1321         /* Since the matrix is static, we can free some memory */
1322         ncc_alloc = li->ncc;
1323         srenew(li->blbnb, ncc_alloc);
1324     }
1325
1326     if (ncc_alloc > li->ncc_alloc)
1327     {
1328         li->ncc_alloc = ncc_alloc;
1329         srenew(li->blmf, li->ncc_alloc);
1330         srenew(li->blmf1, li->ncc_alloc);
1331         srenew(li->tmpncc, li->ncc_alloc);
1332     }
1333
1334     if (debug)
1335     {
1336         fprintf(debug, "Number of constraints is %d, couplings %d\n",
1337                 li->nc, li->ncc);
1338     }
1339
1340     if (li->nth == 1)
1341     {
1342         li->th[0].b0 = 0;
1343         li->th[0].b1 = li->nc;
1344     }
1345     else
1346     {
1347         lincs_thread_setup(li, md->nr);
1348     }
1349
1350     set_lincs_matrix(li, md->invmass, md->lambda);
1351 }
1352
1353 static void lincs_warning(FILE *fplog,
1354                           gmx_domdec_t *dd, rvec *x, rvec *xprime, t_pbc *pbc,
1355                           int ncons, int *bla, real *bllen, real wangle,
1356                           int maxwarn, int *warncount)
1357 {
1358     int  b, i, j;
1359     rvec v0, v1;
1360     real wfac, d0, d1, cosine;
1361     char buf[STRLEN];
1362
1363     wfac = cos(DEG2RAD*wangle);
1364
1365     sprintf(buf, "bonds that rotated more than %g degrees:\n"
1366             " atom 1 atom 2  angle  previous, current, constraint length\n",
1367             wangle);
1368     fprintf(stderr, "%s", buf);
1369     if (fplog)
1370     {
1371         fprintf(fplog, "%s", buf);
1372     }
1373
1374     for (b = 0; b < ncons; b++)
1375     {
1376         i = bla[2*b];
1377         j = bla[2*b+1];
1378         if (pbc)
1379         {
1380             pbc_dx_aiuc(pbc, x[i], x[j], v0);
1381             pbc_dx_aiuc(pbc, xprime[i], xprime[j], v1);
1382         }
1383         else
1384         {
1385             rvec_sub(x[i], x[j], v0);
1386             rvec_sub(xprime[i], xprime[j], v1);
1387         }
1388         d0     = norm(v0);
1389         d1     = norm(v1);
1390         cosine = iprod(v0, v1)/(d0*d1);
1391         if (cosine < wfac)
1392         {
1393             sprintf(buf, " %6d %6d  %5.1f  %8.4f %8.4f    %8.4f\n",
1394                     ddglatnr(dd, i), ddglatnr(dd, j),
1395                     RAD2DEG*acos(cosine), d0, d1, bllen[b]);
1396             fprintf(stderr, "%s", buf);
1397             if (fplog)
1398             {
1399                 fprintf(fplog, "%s", buf);
1400             }
1401             if (!gmx_isfinite(d1))
1402             {
1403                 gmx_fatal(FARGS, "Bond length not finite.");
1404             }
1405
1406             (*warncount)++;
1407         }
1408     }
1409     if (*warncount > maxwarn)
1410     {
1411         too_many_constraint_warnings(econtLINCS, *warncount);
1412     }
1413 }
1414
1415 static void cconerr(gmx_domdec_t *dd,
1416                     int ncons, int *bla, real *bllen, rvec *x, t_pbc *pbc,
1417                     real *ncons_loc, real *ssd, real *max, int *imax)
1418 {
1419     real       len, d, ma, ssd2, r2;
1420     int       *nlocat, count, b, im;
1421     rvec       dx;
1422
1423     if (dd && dd->constraints)
1424     {
1425         nlocat = dd_constraints_nlocalatoms(dd);
1426     }
1427     else
1428     {
1429         nlocat = 0;
1430     }
1431
1432     ma    = 0;
1433     ssd2  = 0;
1434     im    = 0;
1435     count = 0;
1436     for (b = 0; b < ncons; b++)
1437     {
1438         if (pbc)
1439         {
1440             pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
1441         }
1442         else
1443         {
1444             rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
1445         }
1446         r2  = norm2(dx);
1447         len = r2*gmx_invsqrt(r2);
1448         d   = fabs(len/bllen[b]-1);
1449         if (d > ma && (nlocat == NULL || nlocat[b]))
1450         {
1451             ma = d;
1452             im = b;
1453         }
1454         if (nlocat == NULL)
1455         {
1456             ssd2 += d*d;
1457             count++;
1458         }
1459         else
1460         {
1461             ssd2  += nlocat[b]*d*d;
1462             count += nlocat[b];
1463         }
1464     }
1465
1466     *ncons_loc = (nlocat ? 0.5 : 1)*count;
1467     *ssd       = (nlocat ? 0.5 : 1)*ssd2;
1468     *max       = ma;
1469     *imax      = im;
1470 }
1471
1472 gmx_bool constrain_lincs(FILE *fplog, gmx_bool bLog, gmx_bool bEner,
1473                          t_inputrec *ir,
1474                          gmx_large_int_t step,
1475                          struct gmx_lincsdata *lincsd, t_mdatoms *md,
1476                          t_commrec *cr,
1477                          rvec *x, rvec *xprime, rvec *min_proj,
1478                          matrix box, t_pbc *pbc,
1479                          real lambda, real *dvdlambda,
1480                          real invdt, rvec *v,
1481                          gmx_bool bCalcVir, tensor vir_r_m_dr,
1482                          int econq,
1483                          t_nrnb *nrnb,
1484                          int maxwarn, int *warncount)
1485 {
1486     char      buf[STRLEN], buf2[22], buf3[STRLEN];
1487     int       i, warn, p_imax, error;
1488     real      ncons_loc, p_ssd, p_max = 0;
1489     rvec      dx;
1490     gmx_bool  bOK;
1491
1492     bOK = TRUE;
1493
1494     if (lincsd->nc == 0 && cr->dd == NULL)
1495     {
1496         if (bLog || bEner)
1497         {
1498             lincsd->rmsd_data[0] = 0;
1499             if (ir->eI == eiSD2 && v == NULL)
1500             {
1501                 i = 2;
1502             }
1503             else
1504             {
1505                 i = 1;
1506             }
1507             lincsd->rmsd_data[i] = 0;
1508         }
1509
1510         return bOK;
1511     }
1512
1513     if (econq == econqCoord)
1514     {
1515         if (ir->efep != efepNO)
1516         {
1517             if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1518             {
1519                 set_lincs_matrix(lincsd, md->invmass, md->lambda);
1520             }
1521
1522             for (i = 0; i < lincsd->nc; i++)
1523             {
1524                 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1525             }
1526         }
1527
1528         if (lincsd->ncg_flex)
1529         {
1530             /* Set the flexible constraint lengths to the old lengths */
1531             if (pbc != NULL)
1532             {
1533                 for (i = 0; i < lincsd->nc; i++)
1534                 {
1535                     if (lincsd->bllen[i] == 0)
1536                     {
1537                         pbc_dx_aiuc(pbc, x[lincsd->bla[2*i]], x[lincsd->bla[2*i+1]], dx);
1538                         lincsd->bllen[i] = norm(dx);
1539                     }
1540                 }
1541             }
1542             else
1543             {
1544                 for (i = 0; i < lincsd->nc; i++)
1545                 {
1546                     if (lincsd->bllen[i] == 0)
1547                     {
1548                         lincsd->bllen[i] =
1549                             sqrt(distance2(x[lincsd->bla[2*i]],
1550                                            x[lincsd->bla[2*i+1]]));
1551                     }
1552                 }
1553             }
1554         }
1555
1556         if (bLog && fplog)
1557         {
1558             cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1559                     &ncons_loc, &p_ssd, &p_max, &p_imax);
1560         }
1561
1562         /* This warn var can be updated by multiple threads
1563          * at the same time. But as we only need to detect
1564          * if a warning occured or not, this is not an issue.
1565          */
1566         warn = -1;
1567
1568         /* The OpenMP parallel region of constrain_lincs for coords */
1569 #pragma omp parallel num_threads(lincsd->nth)
1570         {
1571             int th = gmx_omp_get_thread_num();
1572
1573             clear_mat(lincsd->th[th].vir_r_m_dr);
1574
1575             do_lincs(x, xprime, box, pbc, lincsd, th,
1576                      md->invmass, cr,
1577                      bCalcVir || (ir->efep != efepNO),
1578                      ir->LincsWarnAngle, &warn,
1579                      invdt, v, bCalcVir,
1580                      th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1581         }
1582
1583         if (ir->efep != efepNO)
1584         {
1585             real dt_2, dvdl = 0;
1586
1587             dt_2 = 1.0/(ir->delta_t*ir->delta_t);
1588             for (i = 0; (i < lincsd->nc); i++)
1589             {
1590                 dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
1591             }
1592             *dvdlambda += dvdl;
1593         }
1594
1595         if (bLog && fplog && lincsd->nc > 0)
1596         {
1597             fprintf(fplog, "   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
1598             fprintf(fplog, "       Before LINCS          %.6f    %.6f %6d %6d\n",
1599                     sqrt(p_ssd/ncons_loc), p_max,
1600                     ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1601                     ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1602         }
1603         if (bLog || bEner)
1604         {
1605             cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1606                     &ncons_loc, &p_ssd, &p_max, &p_imax);
1607             /* Check if we are doing the second part of SD */
1608             if (ir->eI == eiSD2 && v == NULL)
1609             {
1610                 i = 2;
1611             }
1612             else
1613             {
1614                 i = 1;
1615             }
1616             lincsd->rmsd_data[0] = ncons_loc;
1617             lincsd->rmsd_data[i] = p_ssd;
1618         }
1619         else
1620         {
1621             lincsd->rmsd_data[0] = 0;
1622             lincsd->rmsd_data[1] = 0;
1623             lincsd->rmsd_data[2] = 0;
1624         }
1625         if (bLog && fplog && lincsd->nc > 0)
1626         {
1627             fprintf(fplog,
1628                     "        After LINCS          %.6f    %.6f %6d %6d\n\n",
1629                     sqrt(p_ssd/ncons_loc), p_max,
1630                     ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1631                     ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1632         }
1633
1634         if (warn >= 0)
1635         {
1636             if (maxwarn >= 0)
1637             {
1638                 cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1639                         &ncons_loc, &p_ssd, &p_max, &p_imax);
1640                 if (MULTISIM(cr))
1641                 {
1642                     sprintf(buf3, " in simulation %d", cr->ms->sim);
1643                 }
1644                 else
1645                 {
1646                     buf3[0] = 0;
1647                 }
1648                 sprintf(buf, "\nStep %s, time %g (ps)  LINCS WARNING%s\n"
1649                         "relative constraint deviation after LINCS:\n"
1650                         "rms %.6f, max %.6f (between atoms %d and %d)\n",
1651                         gmx_step_str(step, buf2), ir->init_t+step*ir->delta_t,
1652                         buf3,
1653                         sqrt(p_ssd/ncons_loc), p_max,
1654                         ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1655                         ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1656                 if (fplog)
1657                 {
1658                     fprintf(fplog, "%s", buf);
1659                 }
1660                 fprintf(stderr, "%s", buf);
1661                 lincs_warning(fplog, cr->dd, x, xprime, pbc,
1662                               lincsd->nc, lincsd->bla, lincsd->bllen,
1663                               ir->LincsWarnAngle, maxwarn, warncount);
1664             }
1665             bOK = (p_max < 0.5);
1666         }
1667
1668         if (lincsd->ncg_flex)
1669         {
1670             for (i = 0; (i < lincsd->nc); i++)
1671             {
1672                 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1673                 {
1674                     lincsd->bllen[i] = 0;
1675                 }
1676             }
1677         }
1678     }
1679     else
1680     {
1681         /* The OpenMP parallel region of constrain_lincs for derivatives */
1682 #pragma omp parallel num_threads(lincsd->nth)
1683         {
1684             int th = gmx_omp_get_thread_num();
1685
1686             do_lincsp(x, xprime, min_proj, pbc, lincsd, th,
1687                       md->invmass, econq, ir->efep != efepNO ? dvdlambda : NULL,
1688                       bCalcVir, th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1689         }
1690     }
1691
1692     if (bCalcVir && lincsd->nth > 1)
1693     {
1694         for (i = 1; i < lincsd->nth; i++)
1695         {
1696             m_add(vir_r_m_dr, lincsd->th[i].vir_r_m_dr, vir_r_m_dr);
1697         }
1698     }
1699
1700     /* count assuming nit=1 */
1701     inc_nrnb(nrnb, eNR_LINCS, lincsd->nc);
1702     inc_nrnb(nrnb, eNR_LINCSMAT, (2+lincsd->nOrder)*lincsd->ncc);
1703     if (lincsd->ntriangle > 0)
1704     {
1705         inc_nrnb(nrnb, eNR_LINCSMAT, lincsd->nOrder*lincsd->ncc_triangle);
1706     }
1707     if (v)
1708     {
1709         inc_nrnb(nrnb, eNR_CONSTR_V, lincsd->nc*2);
1710     }
1711     if (bCalcVir)
1712     {
1713         inc_nrnb(nrnb, eNR_CONSTR_VIR, lincsd->nc);
1714     }
1715
1716     return bOK;
1717 }