2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
38 /* This file is completely threadsafe - keep it that way! */
55 #include "mtop_util.h"
57 #include "gmx_omp_nthreads.h"
61 int b0; /* first constraint for this thread */
62 int b1; /* b1-1 is the last constraint for this thread */
63 int nind; /* number of indices */
64 int *ind; /* constraint index for updating atom data */
65 int nind_r; /* number of indices */
66 int *ind_r; /* constraint index for updating atom data */
67 int ind_nalloc; /* allocation size of ind and ind_r */
68 tensor vir_r_m_dr;/* temporary variable for virial calculation */
71 typedef struct gmx_lincsdata {
72 int ncg; /* the global number of constraints */
73 int ncg_flex; /* the global number of flexible constraints */
74 int ncg_triangle;/* the global number of constraints in triangles */
75 int nIter; /* the number of iterations */
76 int nOrder; /* the order of the matrix expansion */
77 int nc; /* the number of constraints */
78 int nc_alloc; /* the number we allocated memory for */
79 int ncc; /* the number of constraint connections */
80 int ncc_alloc; /* the number we allocated memory for */
81 real matlam; /* the FE lambda value used for filling blc and blmf */
82 real *bllen0; /* the reference distance in topology A */
83 real *ddist; /* the reference distance in top B - the r.d. in top A */
84 int *bla; /* the atom pairs involved in the constraints */
85 real *blc; /* 1/sqrt(invmass1 + invmass2) */
86 real *blc1; /* as blc, but with all masses 1 */
87 int *blnr; /* index into blbnb and blmf */
88 int *blbnb; /* list of constraint connections */
89 int ntriangle; /* the local number of constraints in triangles */
90 int *triangle; /* the list of triangle constraints */
91 int *tri_bits; /* the bits tell if the matrix element should be used */
92 int ncc_triangle;/* the number of constraint connections in triangles */
93 gmx_bool bCommIter; /* communicate before each LINCS interation */
94 real *blmf; /* matrix of mass factors for constraint connections */
95 real *blmf1; /* as blmf, but with all masses 1 */
96 real *bllen; /* the reference bond length */
97 int nth; /* The number of threads doing LINCS */
98 lincs_thread_t *th; /* LINCS thread division */
99 unsigned *atf; /* atom flags for thread parallelization */
100 int atf_nalloc; /* allocation size of atf */
101 /* arrays for temporary storage in the LINCS algorithm */
108 real *mlambda; /* the Lagrange multipliers * -1 */
109 /* storage for the constraint RMS relative deviation output */
113 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
115 return lincsd->rmsd_data;
118 real lincs_rmsd(struct gmx_lincsdata *lincsd,gmx_bool bSD2)
120 if (lincsd->rmsd_data[0] > 0)
122 return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
130 /* Do a set of nrec LINCS matrix multiplications.
131 * This function will return with up to date thread-local
132 * constraint data, without an OpenMP barrier.
134 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
137 real *rhs1,real *rhs2,real *sol)
139 int nrec,rec,b,j,n,nr0,nr1;
141 int ntriangle,tb,bits;
142 const int *blnr=lincsd->blnr,*blbnb=lincsd->blbnb;
143 const int *triangle=lincsd->triangle,*tri_bits=lincsd->tri_bits;
145 ntriangle = lincsd->ntriangle;
146 nrec = lincsd->nOrder;
148 for(rec=0; rec<nrec; rec++)
154 for(n=blnr[b]; n<blnr[b+1]; n++)
157 mvb = mvb + blcc[n]*rhs1[j];
160 sol[b] = sol[b] + mvb;
165 } /* nrec*(ncons+2*nrtot) flops */
169 /* Perform an extra nrec recursions for only the constraints
170 * involved in rigid triangles.
171 * In this way their accuracy should come close to those of the other
172 * constraints, since traingles of constraints can produce eigenvalues
173 * around 0.7, while the effective eigenvalue for bond constraints
174 * is around 0.4 (and 0.7*0.7=0.5).
176 /* We need to copy the temporary array, since only the elements
177 * for constraints involved in triangles are updated and then
178 * the pointers are swapped. This saving copying the whole arrary.
179 * We need barrier as other threads might still be reading from rhs2.
189 for(rec=0; rec<nrec; rec++)
191 for(tb=0; tb<ntriangle; tb++)
198 for(n=nr0; n<nr1; n++)
200 if (bits & (1<<(n-nr0)))
203 mvb = mvb + blcc[n]*rhs1[j];
207 sol[b] = sol[b] + mvb;
213 } /* flops count is missing here */
215 /* We need a barrier here as the calling routine will continue
216 * to operate on the thread-local constraints without barrier.
222 static void lincs_update_atoms_noind(int ncons,const int *bla,
224 const real *fac,rvec *r,
229 real mvb,im1,im2,tmp0,tmp1,tmp2;
233 for(b=0; b<ncons; b++)
249 } /* 16 ncons flops */
253 for(b=0; b<ncons; b++)
271 static void lincs_update_atoms_ind(int ncons,const int *ind,const int *bla,
273 const real *fac,rvec *r,
278 real mvb,im1,im2,tmp0,tmp1,tmp2;
282 for(bi=0; bi<ncons; bi++)
299 } /* 16 ncons flops */
303 for(bi=0; bi<ncons; bi++)
318 } /* 16 ncons flops */
322 static void lincs_update_atoms(struct gmx_lincsdata *li,int th,
324 const real *fac,rvec *r,
330 /* Single thread, we simply update for all constraints */
331 lincs_update_atoms_noind(li->nc,li->bla,prefac,fac,r,invmass,x);
335 /* Update the atom vector components for our thread local
336 * constraints that only access our local atom range.
337 * This can be done without a barrier.
339 lincs_update_atoms_ind(li->th[th].nind,li->th[th].ind,
340 li->bla,prefac,fac,r,invmass,x);
342 if (li->th[li->nth].nind > 0)
344 /* Update the constraints that operate on atoms
345 * in multiple thread atom blocks on the master thread.
350 lincs_update_atoms_ind(li->th[li->nth].nind,
352 li->bla,prefac,fac,r,invmass,x);
358 /* LINCS projection, works on derivatives of the coordinates */
359 static void do_lincsp(rvec *x,rvec *f,rvec *fp,t_pbc *pbc,
360 struct gmx_lincsdata *lincsd,int th,
362 int econq,real *dvdlambda,
363 gmx_bool bCalcVir,tensor rmdf)
366 real tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,wfac,lam;
368 int *bla,*blnr,*blbnb;
370 real *blc,*blmf,*blcc,*rhs1,*rhs2,*sol;
372 b0 = lincsd->th[th].b0;
373 b1 = lincsd->th[th].b1;
378 blbnb = lincsd->blbnb;
379 if (econq != econqForce)
381 /* Use mass-weighted parameters */
387 /* Use non mass-weighted parameters */
389 blmf = lincsd->blmf1;
391 blcc = lincsd->tmpncc;
396 /* Compute normalized i-j vectors */
401 pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
409 rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
411 } /* 16 ncons flops */
422 for(n=blnr[b]; n<blnr[b+1]; n++)
425 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
427 mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
428 tmp1*(f[i][1] - f[j][1]) +
429 tmp2*(f[i][2] - f[j][2]));
434 /* Together: 23*ncons + 6*nrtot flops */
436 lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
437 /* nrec*(ncons+2*nrtot) flops */
439 if (econq == econqDeriv_FlexCon)
441 /* We only want to constraint the flexible constraints,
442 * so we mask out the normal ones by setting sol to 0.
446 if (!(lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
453 /* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
459 /* When constraining forces, we should not use mass weighting,
460 * so we pass invmass=NULL, which results in the use of 1 for all atoms.
462 lincs_update_atoms(lincsd,th,1.0,sol,r,
463 (econq != econqForce) ? invmass : NULL,fp);
465 if (dvdlambda != NULL)
470 *dvdlambda -= sol[b]*lincsd->ddist[b];
477 /* Constraint virial,
478 * determines sum r_bond x delta f,
479 * where delta f is the constraint correction
480 * of the quantity that is being constrained.
484 mvb = lincsd->bllen[b]*sol[b];
490 rmdf[i][j] += tmp1*r[b][j];
493 } /* 23 ncons flops */
497 static void do_lincs(rvec *x,rvec *xp,matrix box,t_pbc *pbc,
498 struct gmx_lincsdata *lincsd,int th,
501 gmx_bool bCalcLambda,
502 real wangle,int *warn,
504 gmx_bool bCalcVir,tensor vir_r_m_dr)
506 int b0,b1,b,i,j,k,n,iter;
507 real tmp0,tmp1,tmp2,im1,im2,mvb,rlen,len,len2,dlen2,wfac;
509 int *bla,*blnr,*blbnb;
511 real *blc,*blmf,*bllen,*blcc,*rhs1,*rhs2,*sol,*blc_sol,*mlambda;
514 b0 = lincsd->th[th].b0;
515 b1 = lincsd->th[th].b1;
520 blbnb = lincsd->blbnb;
523 bllen = lincsd->bllen;
524 blcc = lincsd->tmpncc;
528 blc_sol= lincsd->tmp4;
529 mlambda= lincsd->mlambda;
531 if (DOMAINDECOMP(cr) && cr->dd->constraints)
533 nlocat = dd_constraints_nlocalatoms(cr->dd);
535 else if (PARTDECOMP(cr))
537 nlocat = pd_constraints_nlocalatoms(cr->pd);
546 /* Compute normalized i-j vectors */
549 pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
555 for(n=blnr[b]; n<blnr[b+1]; n++)
557 blcc[n] = blmf[n]*iprod(r[b],r[blbnb[n]]);
559 pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
560 mvb = blc[b]*(iprod(r[b],dx) - bllen[b]);
567 /* Compute normalized i-j vectors */
572 tmp0 = x[i][0] - x[j][0];
573 tmp1 = x[i][1] - x[j][1];
574 tmp2 = x[i][2] - x[j][2];
575 rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
579 } /* 16 ncons flops */
590 for(n=blnr[b]; n<blnr[b+1]; n++)
593 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
595 mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
596 tmp1*(xp[i][1] - xp[j][1]) +
597 tmp2*(xp[i][2] - xp[j][2]) - len);
602 /* Together: 26*ncons + 6*nrtot flops */
605 lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
606 /* nrec*(ncons+2*nrtot) flops */
610 mlambda[b] = blc[b]*sol[b];
613 /* Update the coordinates */
614 lincs_update_atoms(lincsd,th,1.0,mlambda,r,invmass,xp);
617 ******** Correction for centripetal effects ********
620 wfac = cos(DEG2RAD*wangle);
623 for(iter=0; iter<lincsd->nIter; iter++)
625 if ((lincsd->bCommIter && DOMAINDECOMP(cr) && cr->dd->constraints) ||
631 /* Communicate the corrected non-local coordinates */
632 if (DOMAINDECOMP(cr))
634 dd_move_x_constraints(cr->dd,box,xp,NULL);
638 pd_move_x_constraints(cr,xp,NULL);
649 pbc_dx_aiuc(pbc,xp[bla[2*b]],xp[bla[2*b+1]],dx);
653 rvec_sub(xp[bla[2*b]],xp[bla[2*b+1]],dx);
656 dlen2 = 2*len2 - norm2(dx);
657 if (dlen2 < wfac*len2 && (nlocat==NULL || nlocat[b]))
663 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
671 } /* 20*ncons flops */
673 lincs_matrix_expand(lincsd,b0,b1,blcc,rhs1,rhs2,sol);
674 /* nrec*(ncons+2*nrtot) flops */
683 /* Update the coordinates */
684 lincs_update_atoms(lincsd,th,1.0,blc_sol,r,invmass,xp);
686 /* nit*ncons*(37+9*nrec) flops */
690 /* Update the velocities */
691 lincs_update_atoms(lincsd,th,invdt,mlambda,r,invmass,v);
695 if (nlocat != NULL && bCalcLambda)
697 /* In lincs_update_atoms thread might cross-read mlambda */
700 /* Only account for local atoms */
703 mlambda[b] *= 0.5*nlocat[b];
709 /* Constraint virial */
712 tmp0 = -bllen[b]*mlambda[b];
718 vir_r_m_dr[i][j] -= tmp1*r[b][j];
721 } /* 22 ncons flops */
725 * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
726 * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
728 * (26+nrec)*ncons + (6+2*nrec)*nrtot
729 * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
731 * (63+nrec)*ncons + (6+4*nrec)*nrtot
735 void set_lincs_matrix(struct gmx_lincsdata *li,real *invmass,real lambda)
737 int i,a1,a2,n,k,sign,center;
739 const real invsqrt2=0.7071067811865475244;
741 for(i=0; (i<li->nc); i++)
745 li->blc[i] = gmx_invsqrt(invmass[a1] + invmass[a2]);
746 li->blc1[i] = invsqrt2;
749 /* Construct the coupling coefficient matrix blmf */
751 li->ncc_triangle = 0;
752 for(i=0; (i<li->nc); i++)
756 for(n=li->blnr[i]; (n<li->blnr[i+1]); n++)
759 if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
767 if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
777 li->blmf[n] = sign*invmass[center]*li->blc[i]*li->blc[k];
778 li->blmf1[n] = sign*0.5;
779 if (li->ncg_triangle > 0)
781 /* Look for constraint triangles */
782 for(nk=li->blnr[k]; (nk<li->blnr[k+1]); nk++)
785 if (kk != i && kk != k &&
786 (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
788 if (li->ntriangle == 0 ||
789 li->triangle[li->ntriangle-1] < i)
791 /* Add this constraint to the triangle list */
792 li->triangle[li->ntriangle] = i;
793 li->tri_bits[li->ntriangle] = 0;
795 if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
797 gmx_fatal(FARGS,"A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
798 li->blnr[i+1] - li->blnr[i],
799 sizeof(li->tri_bits[0])*8-1);
802 li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
812 fprintf(debug,"Of the %d constraints %d participate in triangles\n",
813 li->nc,li->ntriangle);
814 fprintf(debug,"There are %d couplings of which %d in triangles\n",
815 li->ncc,li->ncc_triangle);
819 * so we know with which lambda value the masses have been set.
824 static int count_triangle_constraints(t_ilist *ilist,t_blocka *at2con)
827 int c0,a00,a01,n1,c1,a10,a11,ac1,n2,c2,a20,a21;
830 t_iatom *ia1,*ia2,*iap;
832 ncon1 = ilist[F_CONSTR].nr/3;
833 ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
835 ia1 = ilist[F_CONSTR].iatoms;
836 ia2 = ilist[F_CONSTRNC].iatoms;
839 for(c0=0; c0<ncon_tot; c0++)
842 iap = constr_iatomptr(ncon1,ia1,ia2,c0);
845 for(n1=at2con->index[a01]; n1<at2con->index[a01+1]; n1++)
850 iap = constr_iatomptr(ncon1,ia1,ia2,c1);
861 for(n2=at2con->index[ac1]; n2<at2con->index[ac1+1]; n2++)
864 if (c2 != c0 && c2 != c1)
866 iap = constr_iatomptr(ncon1,ia1,ia2,c2);
869 if (a20 == a00 || a21 == a00)
883 return ncon_triangle;
886 static gmx_bool more_than_two_sequential_constraints(const t_ilist *ilist,
887 const t_blocka *at2con)
889 t_iatom *ia1,*ia2,*iap;
890 int ncon1,ncon_tot,c;
892 gmx_bool bMoreThanTwoSequentialConstraints;
894 ncon1 = ilist[F_CONSTR].nr/3;
895 ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
897 ia1 = ilist[F_CONSTR].iatoms;
898 ia2 = ilist[F_CONSTRNC].iatoms;
900 bMoreThanTwoSequentialConstraints = FALSE;
901 for(c=0; c<ncon_tot && !bMoreThanTwoSequentialConstraints; c++)
903 iap = constr_iatomptr(ncon1,ia1,ia2,c);
906 /* Check if this constraint has constraints connected at both atoms */
907 if (at2con->index[a1+1] - at2con->index[a1] > 1 &&
908 at2con->index[a2+1] - at2con->index[a2] > 1)
910 bMoreThanTwoSequentialConstraints = TRUE;
914 return bMoreThanTwoSequentialConstraints;
917 static int int_comp(const void *a,const void *b)
919 return (*(int *)a) - (*(int *)b);
922 gmx_lincsdata_t init_lincs(FILE *fplog,gmx_mtop_t *mtop,
923 int nflexcon_global,t_blocka *at2con,
924 gmx_bool bPLINCS,int nIter,int nProjOrder)
926 struct gmx_lincsdata *li;
932 fprintf(fplog,"\nInitializing%s LINear Constraint Solver\n",
933 bPLINCS ? " Parallel" : "");
939 gmx_mtop_ftype_count(mtop,F_CONSTR) +
940 gmx_mtop_ftype_count(mtop,F_CONSTRNC);
941 li->ncg_flex = nflexcon_global;
944 li->nOrder = nProjOrder;
946 li->ncg_triangle = 0;
947 li->bCommIter = FALSE;
948 for(mb=0; mb<mtop->nmolblock; mb++)
950 molt = &mtop->moltype[mtop->molblock[mb].type];
952 mtop->molblock[mb].nmol*
953 count_triangle_constraints(molt->ilist,
954 &at2con[mtop->molblock[mb].type]);
955 if (bPLINCS && li->bCommIter == FALSE)
957 /* Check if we need to communicate not only before LINCS,
958 * but also before each iteration.
959 * The check for only two sequential constraints is only
960 * useful for the common case of H-bond only constraints.
961 * With more effort we could also make it useful for small
962 * molecules with nr. sequential constraints <= nOrder-1.
964 li->bCommIter = (li->nOrder < 1 || more_than_two_sequential_constraints(molt->ilist,&at2con[mtop->molblock[mb].type]));
967 if (debug && bPLINCS)
969 fprintf(debug,"PLINCS communication before each iteration: %d\n",
973 /* LINCS can run on any number of threads.
974 * Currently the number is fixed for the whole simulation,
975 * but it could be set in set_lincs().
977 li->nth = gmx_omp_nthreads_get(emntLINCS);
984 /* Allocate an extra elements for "thread-overlap" constraints */
985 snew(li->th,li->nth+1);
989 fprintf(debug,"LINCS: using %d threads\n",li->nth);
992 if (bPLINCS || li->ncg_triangle > 0)
994 please_cite(fplog,"Hess2008a");
998 please_cite(fplog,"Hess97a");
1003 fprintf(fplog,"The number of constraints is %d\n",li->ncg);
1006 fprintf(fplog,"There are inter charge-group constraints,\n"
1007 "will communicate selected coordinates each lincs iteration\n");
1009 if (li->ncg_triangle > 0)
1012 "%d constraints are involved in constraint triangles,\n"
1013 "will apply an additional matrix expansion of order %d for couplings\n"
1014 "between constraints inside triangles\n",
1015 li->ncg_triangle,li->nOrder);
1022 /* Sets up the work division over the threads */
1023 static void lincs_thread_setup(struct gmx_lincsdata *li,int natoms)
1025 lincs_thread_t *li_m;
1030 if (natoms > li->atf_nalloc)
1032 li->atf_nalloc = over_alloc_large(natoms);
1033 srenew(li->atf,li->atf_nalloc);
1037 /* Clear the atom flags */
1038 for(a=0; a<natoms; a++)
1043 for(th=0; th<li->nth; th++)
1045 lincs_thread_t *li_th;
1048 li_th = &li->th[th];
1050 /* The constraints are divided equally over the threads */
1051 li_th->b0 = (li->nc* th )/li->nth;
1052 li_th->b1 = (li->nc*(th+1))/li->nth;
1054 if (th < sizeof(*atf)*8)
1056 /* For each atom set a flag for constraints from each */
1057 for(b=li_th->b0; b<li_th->b1; b++)
1059 atf[li->bla[b*2] ] |= (1U<<th);
1060 atf[li->bla[b*2+1]] |= (1U<<th);
1065 #pragma omp parallel for num_threads(li->nth) schedule(static)
1066 for(th=0; th<li->nth; th++)
1068 lincs_thread_t *li_th;
1072 li_th = &li->th[th];
1074 if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
1076 li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
1077 srenew(li_th->ind,li_th->ind_nalloc);
1078 srenew(li_th->ind_r,li_th->ind_nalloc);
1081 if (th < sizeof(*atf)*8)
1083 mask = (1U<<th) - 1U;
1087 for(b=li_th->b0; b<li_th->b1; b++)
1089 /* We let the constraint with the lowest thread index
1090 * operate on atoms with constraints from multiple threads.
1092 if (((atf[li->bla[b*2]] & mask) == 0) &&
1093 ((atf[li->bla[b*2+1]] & mask) == 0))
1095 /* Add the constraint to the local atom update index */
1096 li_th->ind[li_th->nind++] = b;
1100 /* Add the constraint to the rest block */
1101 li_th->ind_r[li_th->nind_r++] = b;
1107 /* We are out of bits, assign all constraints to rest */
1108 for(b=li_th->b0; b<li_th->b1; b++)
1110 li_th->ind_r[li_th->nind_r++] = b;
1115 /* We need to copy all constraints which have not be assigned
1116 * to a thread to a separate list which will be handled by one thread.
1118 li_m = &li->th[li->nth];
1121 for(th=0; th<li->nth; th++)
1123 lincs_thread_t *li_th;
1126 li_th = &li->th[th];
1128 if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1130 li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1131 srenew(li_m->ind,li_m->ind_nalloc);
1134 for(b=0; b<li_th->nind_r; b++)
1136 li_m->ind[li_m->nind++] = li_th->ind_r[b];
1141 fprintf(debug,"LINCS thread %d: %d constraints\n",
1148 fprintf(debug,"LINCS thread r: %d constraints\n",
1154 void set_lincs(t_idef *idef,t_mdatoms *md,
1155 gmx_bool bDynamics,t_commrec *cr,
1156 struct gmx_lincsdata *li)
1158 int start,natoms,nflexcon;
1161 int i,k,ncc_alloc,ni,con,nconnect,concon;
1168 /* Zero the thread index ranges.
1169 * Otherwise without local constraints we could return with old ranges.
1171 for(i=0; i<li->nth; i++)
1179 li->th[li->nth].nind = 0;
1182 /* This is the local topology, so there are only F_CONSTR constraints */
1183 if (idef->il[F_CONSTR].nr == 0)
1185 /* There are no constraints,
1186 * we do not need to fill any data structures.
1193 fprintf(debug,"Building the LINCS connectivity\n");
1196 if (DOMAINDECOMP(cr))
1198 if (cr->dd->constraints)
1200 dd_get_constraint_range(cr->dd,&start,&natoms);
1204 natoms = cr->dd->nat_home;
1208 else if(PARTDECOMP(cr))
1210 pd_get_constraint_range(cr->pd,&start,&natoms);
1215 natoms = md->homenr;
1217 at2con = make_at2con(start,natoms,idef->il,idef->iparams,bDynamics,
1221 if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1223 li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1224 srenew(li->bllen0,li->nc_alloc);
1225 srenew(li->ddist,li->nc_alloc);
1226 srenew(li->bla,2*li->nc_alloc);
1227 srenew(li->blc,li->nc_alloc);
1228 srenew(li->blc1,li->nc_alloc);
1229 srenew(li->blnr,li->nc_alloc+1);
1230 srenew(li->bllen,li->nc_alloc);
1231 srenew(li->tmpv,li->nc_alloc);
1232 srenew(li->tmp1,li->nc_alloc);
1233 srenew(li->tmp2,li->nc_alloc);
1234 srenew(li->tmp3,li->nc_alloc);
1235 srenew(li->tmp4,li->nc_alloc);
1236 srenew(li->mlambda,li->nc_alloc);
1237 if (li->ncg_triangle > 0)
1239 /* This is allocating too much, but it is difficult to improve */
1240 srenew(li->triangle,li->nc_alloc);
1241 srenew(li->tri_bits,li->nc_alloc);
1245 iatom = idef->il[F_CONSTR].iatoms;
1247 ncc_alloc = li->ncc_alloc;
1250 ni = idef->il[F_CONSTR].nr/3;
1254 li->blnr[con] = nconnect;
1261 lenA = idef->iparams[type].constr.dA;
1262 lenB = idef->iparams[type].constr.dB;
1263 /* Skip the flexible constraints when not doing dynamics */
1264 if (bDynamics || lenA!=0 || lenB!=0)
1266 li->bllen0[con] = lenA;
1267 li->ddist[con] = lenB - lenA;
1268 /* Set the length to the topology A length */
1269 li->bllen[con] = li->bllen0[con];
1270 li->bla[2*con] = a1;
1271 li->bla[2*con+1] = a2;
1272 /* Construct the constraint connection matrix blbnb */
1273 for(k=at2con.index[a1-start]; k<at2con.index[a1-start+1]; k++)
1275 concon = at2con.a[k];
1278 if (nconnect >= ncc_alloc)
1280 ncc_alloc = over_alloc_small(nconnect+1);
1281 srenew(li->blbnb,ncc_alloc);
1283 li->blbnb[nconnect++] = concon;
1286 for(k=at2con.index[a2-start]; k<at2con.index[a2-start+1]; k++)
1288 concon = at2con.a[k];
1291 if (nconnect+1 > ncc_alloc)
1293 ncc_alloc = over_alloc_small(nconnect+1);
1294 srenew(li->blbnb,ncc_alloc);
1296 li->blbnb[nconnect++] = concon;
1299 li->blnr[con+1] = nconnect;
1303 /* Order the blbnb matrix to optimize memory access */
1304 qsort(&(li->blbnb[li->blnr[con]]),li->blnr[con+1]-li->blnr[con],
1305 sizeof(li->blbnb[0]),int_comp);
1307 /* Increase the constraint count */
1312 done_blocka(&at2con);
1314 /* This is the real number of constraints,
1315 * without dynamics the flexible constraints are not present.
1319 li->ncc = li->blnr[con];
1322 /* Since the matrix is static, we can free some memory */
1323 ncc_alloc = li->ncc;
1324 srenew(li->blbnb,ncc_alloc);
1327 if (ncc_alloc > li->ncc_alloc)
1329 li->ncc_alloc = ncc_alloc;
1330 srenew(li->blmf,li->ncc_alloc);
1331 srenew(li->blmf1,li->ncc_alloc);
1332 srenew(li->tmpncc,li->ncc_alloc);
1337 fprintf(debug,"Number of constraints is %d, couplings %d\n",
1344 li->th[0].b1 = li->nc;
1348 lincs_thread_setup(li,md->nr);
1351 set_lincs_matrix(li,md->invmass,md->lambda);
1354 static void lincs_warning(FILE *fplog,
1355 gmx_domdec_t *dd,rvec *x,rvec *xprime,t_pbc *pbc,
1356 int ncons,int *bla,real *bllen,real wangle,
1357 int maxwarn,int *warncount)
1361 real wfac,d0,d1,cosine;
1364 wfac=cos(DEG2RAD*wangle);
1366 sprintf(buf,"bonds that rotated more than %g degrees:\n"
1367 " atom 1 atom 2 angle previous, current, constraint length\n",
1369 fprintf(stderr,"%s",buf);
1372 fprintf(fplog,"%s",buf);
1375 for(b=0;b<ncons;b++)
1381 pbc_dx_aiuc(pbc,x[i],x[j],v0);
1382 pbc_dx_aiuc(pbc,xprime[i],xprime[j],v1);
1386 rvec_sub(x[i],x[j],v0);
1387 rvec_sub(xprime[i],xprime[j],v1);
1391 cosine = iprod(v0,v1)/(d0*d1);
1394 sprintf(buf," %6d %6d %5.1f %8.4f %8.4f %8.4f\n",
1395 ddglatnr(dd,i),ddglatnr(dd,j),
1396 RAD2DEG*acos(cosine),d0,d1,bllen[b]);
1397 fprintf(stderr,"%s",buf);
1400 fprintf(fplog,"%s",buf);
1402 if (!gmx_isfinite(d1))
1404 gmx_fatal(FARGS,"Bond length not finite.");
1410 if (*warncount > maxwarn)
1412 too_many_constraint_warnings(econtLINCS,*warncount);
1416 static void cconerr(gmx_domdec_t *dd,
1417 int ncons,int *bla,real *bllen,rvec *x,t_pbc *pbc,
1418 real *ncons_loc,real *ssd,real *max,int *imax)
1420 real len,d,ma,ssd2,r2;
1421 int *nlocat,count,b,im;
1424 if (dd && dd->constraints)
1426 nlocat = dd_constraints_nlocalatoms(dd);
1437 for(b=0;b<ncons;b++)
1441 pbc_dx_aiuc(pbc,x[bla[2*b]],x[bla[2*b+1]],dx);
1444 rvec_sub(x[bla[2*b]],x[bla[2*b+1]],dx);
1447 len = r2*gmx_invsqrt(r2);
1448 d = fabs(len/bllen[b]-1);
1449 if (d > ma && (nlocat==NULL || nlocat[b]))
1461 ssd2 += nlocat[b]*d*d;
1466 *ncons_loc = (nlocat ? 0.5 : 1)*count;
1467 *ssd = (nlocat ? 0.5 : 1)*ssd2;
1472 static void dump_conf(gmx_domdec_t *dd,struct gmx_lincsdata *li,
1474 char *name,gmx_bool bAll,rvec *x,matrix box)
1480 dd_get_constraint_range(dd,&ac0,&ac1);
1482 sprintf(str,"%s_%d_%d_%d.pdb",name,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
1483 fp = gmx_fio_fopen(str,"w");
1484 fprintf(fp,"CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1\n",
1485 10*norm(box[XX]),10*norm(box[YY]),10*norm(box[ZZ]),
1487 for(i=0; i<ac1; i++)
1489 if (i < dd->nat_home || (bAll && i >= ac0 && i < ac1))
1491 fprintf(fp,"%-6s%5u %-4.4s%3.3s %c%4d %8.3f%8.3f%8.3f%6.2f%6.2f\n",
1492 "ATOM",ddglatnr(dd,i),"C","ALA",' ',i+1,
1493 10*x[i][XX],10*x[i][YY],10*x[i][ZZ],
1494 1.0,i<dd->nat_tot ? 0.0 : 1.0);
1499 for(i=0; i<li->nc; i++)
1501 fprintf(fp,"CONECT%5d%5d\n",
1502 ddglatnr(dd,li->bla[2*i]),
1503 ddglatnr(dd,li->bla[2*i+1]));
1509 gmx_bool constrain_lincs(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
1511 gmx_large_int_t step,
1512 struct gmx_lincsdata *lincsd,t_mdatoms *md,
1514 rvec *x,rvec *xprime,rvec *min_proj,
1515 matrix box,t_pbc *pbc,
1516 real lambda,real *dvdlambda,
1518 gmx_bool bCalcVir,tensor vir_r_m_dr,
1521 int maxwarn,int *warncount)
1523 char buf[STRLEN],buf2[22],buf3[STRLEN];
1524 int i,warn,p_imax,error;
1525 real ncons_loc,p_ssd,p_max=0;
1531 if (lincsd->nc == 0 && cr->dd == NULL)
1535 lincsd->rmsd_data[0] = 0;
1536 if (ir->eI == eiSD2 && v == NULL)
1544 lincsd->rmsd_data[i] = 0;
1550 if (econq == econqCoord)
1552 if (ir->efep != efepNO)
1554 if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1556 set_lincs_matrix(lincsd,md->invmass,md->lambda);
1559 for(i=0; i<lincsd->nc; i++)
1561 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1565 if (lincsd->ncg_flex)
1567 /* Set the flexible constraint lengths to the old lengths */
1570 for(i=0; i<lincsd->nc; i++)
1572 if (lincsd->bllen[i] == 0) {
1573 pbc_dx_aiuc(pbc,x[lincsd->bla[2*i]],x[lincsd->bla[2*i+1]],dx);
1574 lincsd->bllen[i] = norm(dx);
1580 for(i=0; i<lincsd->nc; i++)
1582 if (lincsd->bllen[i] == 0)
1585 sqrt(distance2(x[lincsd->bla[2*i]],
1586 x[lincsd->bla[2*i+1]]));
1594 cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1595 &ncons_loc,&p_ssd,&p_max,&p_imax);
1598 /* This warn var can be updated by multiple threads
1599 * at the same time. But as we only need to detect
1600 * if a warning occured or not, this is not an issue.
1604 /* The OpenMP parallel region of constrain_lincs for coords */
1605 #pragma omp parallel num_threads(lincsd->nth)
1607 int th=gmx_omp_get_thread_num();
1609 clear_mat(lincsd->th[th].vir_r_m_dr);
1611 do_lincs(x,xprime,box,pbc,lincsd,th,
1613 bCalcVir || (ir->efep != efepNO),
1614 ir->LincsWarnAngle,&warn,
1616 th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1619 if (ir->efep != efepNO)
1623 dt_2 = 1.0/(ir->delta_t*ir->delta_t);
1624 for(i=0; (i<lincsd->nc); i++)
1626 dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
1631 if (bLog && fplog && lincsd->nc > 0)
1633 fprintf(fplog," Rel. Constraint Deviation: RMS MAX between atoms\n");
1634 fprintf(fplog," Before LINCS %.6f %.6f %6d %6d\n",
1635 sqrt(p_ssd/ncons_loc),p_max,
1636 ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1637 ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1641 cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1642 &ncons_loc,&p_ssd,&p_max,&p_imax);
1643 /* Check if we are doing the second part of SD */
1644 if (ir->eI == eiSD2 && v == NULL)
1652 lincsd->rmsd_data[0] = ncons_loc;
1653 lincsd->rmsd_data[i] = p_ssd;
1657 lincsd->rmsd_data[0] = 0;
1658 lincsd->rmsd_data[1] = 0;
1659 lincsd->rmsd_data[2] = 0;
1661 if (bLog && fplog && lincsd->nc > 0)
1664 " After LINCS %.6f %.6f %6d %6d\n\n",
1665 sqrt(p_ssd/ncons_loc),p_max,
1666 ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1667 ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1674 cconerr(cr->dd,lincsd->nc,lincsd->bla,lincsd->bllen,xprime,pbc,
1675 &ncons_loc,&p_ssd,&p_max,&p_imax);
1678 sprintf(buf3," in simulation %d", cr->ms->sim);
1684 sprintf(buf,"\nStep %s, time %g (ps) LINCS WARNING%s\n"
1685 "relative constraint deviation after LINCS:\n"
1686 "rms %.6f, max %.6f (between atoms %d and %d)\n",
1687 gmx_step_str(step,buf2),ir->init_t+step*ir->delta_t,
1689 sqrt(p_ssd/ncons_loc),p_max,
1690 ddglatnr(cr->dd,lincsd->bla[2*p_imax]),
1691 ddglatnr(cr->dd,lincsd->bla[2*p_imax+1]));
1694 fprintf(fplog,"%s",buf);
1696 fprintf(stderr,"%s",buf);
1697 lincs_warning(fplog,cr->dd,x,xprime,pbc,
1698 lincsd->nc,lincsd->bla,lincsd->bllen,
1699 ir->LincsWarnAngle,maxwarn,warncount);
1701 bOK = (p_max < 0.5);
1704 if (lincsd->ncg_flex) {
1705 for(i=0; (i<lincsd->nc); i++)
1706 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1707 lincsd->bllen[i] = 0;
1712 /* The OpenMP parallel region of constrain_lincs for derivatives */
1713 #pragma omp parallel num_threads(lincsd->nth)
1715 int th=gmx_omp_get_thread_num();
1717 do_lincsp(x,xprime,min_proj,pbc,lincsd,th,
1718 md->invmass,econq,ir->efep != efepNO ? dvdlambda : NULL,
1719 bCalcVir,th==0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1723 if (bCalcVir && lincsd->nth > 1)
1725 for(i=1; i<lincsd->nth; i++)
1727 m_add(vir_r_m_dr,lincsd->th[i].vir_r_m_dr,vir_r_m_dr);
1731 /* count assuming nit=1 */
1732 inc_nrnb(nrnb,eNR_LINCS,lincsd->nc);
1733 inc_nrnb(nrnb,eNR_LINCSMAT,(2+lincsd->nOrder)*lincsd->ncc);
1734 if (lincsd->ntriangle > 0)
1736 inc_nrnb(nrnb,eNR_LINCSMAT,lincsd->nOrder*lincsd->ncc_triangle);
1740 inc_nrnb(nrnb,eNR_CONSTR_V,lincsd->nc*2);
1744 inc_nrnb(nrnb,eNR_CONSTR_VIR,lincsd->nc);