2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 /* This file is completely threadsafe - keep it that way! */
49 #include "gromacs/utility/smalloc.h"
53 #include "mtop_util.h"
54 #include "gmx_omp_nthreads.h"
56 #include "gromacs/fileio/gmxfio.h"
57 #include "gmx_fatal.h"
58 #include "gromacs/utility/gmxomp.h"
61 int b0; /* first constraint for this thread */
62 int b1; /* b1-1 is the last constraint for this thread */
63 int nind; /* number of indices */
64 int *ind; /* constraint index for updating atom data */
65 int nind_r; /* number of indices */
66 int *ind_r; /* constraint index for updating atom data */
67 int ind_nalloc; /* allocation size of ind and ind_r */
68 tensor vir_r_m_dr; /* temporary variable for virial calculation */
69 real dhdlambda; /* temporary variable for lambda derivative */
72 typedef struct gmx_lincsdata {
73 int ncg; /* the global number of constraints */
74 int ncg_flex; /* the global number of flexible constraints */
75 int ncg_triangle; /* the global number of constraints in triangles */
76 int nIter; /* the number of iterations */
77 int nOrder; /* the order of the matrix expansion */
78 int nc; /* the number of constraints */
79 int nc_alloc; /* the number we allocated memory for */
80 int ncc; /* the number of constraint connections */
81 int ncc_alloc; /* the number we allocated memory for */
82 real matlam; /* the FE lambda value used for filling blc and blmf */
83 real *bllen0; /* the reference distance in topology A */
84 real *ddist; /* the reference distance in top B - the r.d. in top A */
85 int *bla; /* the atom pairs involved in the constraints */
86 real *blc; /* 1/sqrt(invmass1 + invmass2) */
87 real *blc1; /* as blc, but with all masses 1 */
88 int *blnr; /* index into blbnb and blmf */
89 int *blbnb; /* list of constraint connections */
90 int ntriangle; /* the local number of constraints in triangles */
91 int *triangle; /* the list of triangle constraints */
92 int *tri_bits; /* the bits tell if the matrix element should be used */
93 int ncc_triangle; /* the number of constraint connections in triangles */
94 gmx_bool bCommIter; /* communicate before each LINCS interation */
95 real *blmf; /* matrix of mass factors for constraint connections */
96 real *blmf1; /* as blmf, but with all masses 1 */
97 real *bllen; /* the reference bond length */
98 int nth; /* The number of threads doing LINCS */
99 lincs_thread_t *th; /* LINCS thread division */
100 unsigned *atf; /* atom flags for thread parallelization */
101 int atf_nalloc; /* allocation size of atf */
102 /* arrays for temporary storage in the LINCS algorithm */
109 real *mlambda; /* the Lagrange multipliers * -1 */
110 /* storage for the constraint RMS relative deviation output */
114 real *lincs_rmsd_data(struct gmx_lincsdata *lincsd)
116 return lincsd->rmsd_data;
119 real lincs_rmsd(struct gmx_lincsdata *lincsd, gmx_bool bSD2)
121 if (lincsd->rmsd_data[0] > 0)
123 return sqrt(lincsd->rmsd_data[bSD2 ? 2 : 1]/lincsd->rmsd_data[0]);
131 /* Do a set of nrec LINCS matrix multiplications.
132 * This function will return with up to date thread-local
133 * constraint data, without an OpenMP barrier.
135 static void lincs_matrix_expand(const struct gmx_lincsdata *lincsd,
138 real *rhs1, real *rhs2, real *sol)
140 int nrec, rec, b, j, n, nr0, nr1;
142 int ntriangle, tb, bits;
143 const int *blnr = lincsd->blnr, *blbnb = lincsd->blbnb;
144 const int *triangle = lincsd->triangle, *tri_bits = lincsd->tri_bits;
146 ntriangle = lincsd->ntriangle;
147 nrec = lincsd->nOrder;
149 for (rec = 0; rec < nrec; rec++)
152 for (b = b0; b < b1; b++)
155 for (n = blnr[b]; n < blnr[b+1]; n++)
158 mvb = mvb + blcc[n]*rhs1[j];
161 sol[b] = sol[b] + mvb;
166 } /* nrec*(ncons+2*nrtot) flops */
170 /* Perform an extra nrec recursions for only the constraints
171 * involved in rigid triangles.
172 * In this way their accuracy should come close to those of the other
173 * constraints, since traingles of constraints can produce eigenvalues
174 * around 0.7, while the effective eigenvalue for bond constraints
175 * is around 0.4 (and 0.7*0.7=0.5).
177 /* We need to copy the temporary array, since only the elements
178 * for constraints involved in triangles are updated and then
179 * the pointers are swapped. This saving copying the whole arrary.
180 * We need barrier as other threads might still be reading from rhs2.
183 for (b = b0; b < b1; b++)
190 for (rec = 0; rec < nrec; rec++)
192 for (tb = 0; tb < ntriangle; tb++)
199 for (n = nr0; n < nr1; n++)
201 if (bits & (1<<(n-nr0)))
204 mvb = mvb + blcc[n]*rhs1[j];
208 sol[b] = sol[b] + mvb;
214 } /* flops count is missing here */
216 /* We need a barrier here as the calling routine will continue
217 * to operate on the thread-local constraints without barrier.
223 static void lincs_update_atoms_noind(int ncons, const int *bla,
225 const real *fac, rvec *r,
230 real mvb, im1, im2, tmp0, tmp1, tmp2;
234 for (b = 0; b < ncons; b++)
250 } /* 16 ncons flops */
254 for (b = 0; b < ncons; b++)
272 static void lincs_update_atoms_ind(int ncons, const int *ind, const int *bla,
274 const real *fac, rvec *r,
279 real mvb, im1, im2, tmp0, tmp1, tmp2;
283 for (bi = 0; bi < ncons; bi++)
300 } /* 16 ncons flops */
304 for (bi = 0; bi < ncons; bi++)
319 } /* 16 ncons flops */
323 static void lincs_update_atoms(struct gmx_lincsdata *li, int th,
325 const real *fac, rvec *r,
331 /* Single thread, we simply update for all constraints */
332 lincs_update_atoms_noind(li->nc, li->bla, prefac, fac, r, invmass, x);
336 /* Update the atom vector components for our thread local
337 * constraints that only access our local atom range.
338 * This can be done without a barrier.
340 lincs_update_atoms_ind(li->th[th].nind, li->th[th].ind,
341 li->bla, prefac, fac, r, invmass, x);
343 if (li->th[li->nth].nind > 0)
345 /* Update the constraints that operate on atoms
346 * in multiple thread atom blocks on the master thread.
351 lincs_update_atoms_ind(li->th[li->nth].nind,
353 li->bla, prefac, fac, r, invmass, x);
359 /* LINCS projection, works on derivatives of the coordinates */
360 static void do_lincsp(rvec *x, rvec *f, rvec *fp, t_pbc *pbc,
361 struct gmx_lincsdata *lincsd, int th,
363 int econq, gmx_bool bCalcDHDL,
364 gmx_bool bCalcVir, tensor rmdf)
366 int b0, b1, b, i, j, k, n;
367 real tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, wfac, lam;
369 int *bla, *blnr, *blbnb;
371 real *blc, *blmf, *blcc, *rhs1, *rhs2, *sol;
373 b0 = lincsd->th[th].b0;
374 b1 = lincsd->th[th].b1;
379 blbnb = lincsd->blbnb;
380 if (econq != econqForce)
382 /* Use mass-weighted parameters */
388 /* Use non mass-weighted parameters */
390 blmf = lincsd->blmf1;
392 blcc = lincsd->tmpncc;
397 /* Compute normalized i-j vectors */
400 for (b = b0; b < b1; b++)
402 pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
408 for (b = b0; b < b1; b++)
410 rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
412 } /* 16 ncons flops */
416 for (b = b0; b < b1; b++)
423 for (n = blnr[b]; n < blnr[b+1]; n++)
426 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
428 mvb = blc[b]*(tmp0*(f[i][0] - f[j][0]) +
429 tmp1*(f[i][1] - f[j][1]) +
430 tmp2*(f[i][2] - f[j][2]));
435 /* Together: 23*ncons + 6*nrtot flops */
437 lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
438 /* nrec*(ncons+2*nrtot) flops */
440 if (econq == econqDeriv_FlexCon)
442 /* We only want to constraint the flexible constraints,
443 * so we mask out the normal ones by setting sol to 0.
445 for (b = b0; b < b1; b++)
447 if (!(lincsd->bllen0[b] == 0 && lincsd->ddist[b] == 0))
454 /* We multiply sol by blc, so we can use lincs_update_atoms for OpenMP */
455 for (b = b0; b < b1; b++)
460 /* When constraining forces, we should not use mass weighting,
461 * so we pass invmass=NULL, which results in the use of 1 for all atoms.
463 lincs_update_atoms(lincsd, th, 1.0, sol, r,
464 (econq != econqForce) ? invmass : NULL, fp);
471 for (b = b0; b < b1; b++)
473 dhdlambda -= sol[b]*lincsd->ddist[b];
476 lincsd->th[th].dhdlambda = dhdlambda;
481 /* Constraint virial,
482 * determines sum r_bond x delta f,
483 * where delta f is the constraint correction
484 * of the quantity that is being constrained.
486 for (b = b0; b < b1; b++)
488 mvb = lincsd->bllen[b]*sol[b];
489 for (i = 0; i < DIM; i++)
492 for (j = 0; j < DIM; j++)
494 rmdf[i][j] += tmp1*r[b][j];
497 } /* 23 ncons flops */
501 static void do_lincs(rvec *x, rvec *xp, matrix box, t_pbc *pbc,
502 struct gmx_lincsdata *lincsd, int th,
506 real wangle, int *warn,
508 gmx_bool bCalcVir, tensor vir_r_m_dr)
510 int b0, b1, b, i, j, k, n, iter;
511 real tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, len2, dlen2, wfac;
513 int *bla, *blnr, *blbnb;
515 real *blc, *blmf, *bllen, *blcc, *rhs1, *rhs2, *sol, *blc_sol, *mlambda;
518 b0 = lincsd->th[th].b0;
519 b1 = lincsd->th[th].b1;
524 blbnb = lincsd->blbnb;
527 bllen = lincsd->bllen;
528 blcc = lincsd->tmpncc;
532 blc_sol = lincsd->tmp4;
533 mlambda = lincsd->mlambda;
535 if (DOMAINDECOMP(cr) && cr->dd->constraints)
537 nlocat = dd_constraints_nlocalatoms(cr->dd);
546 /* Compute normalized i-j vectors */
547 for (b = b0; b < b1; b++)
549 pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
553 for (b = b0; b < b1; b++)
555 for (n = blnr[b]; n < blnr[b+1]; n++)
557 blcc[n] = blmf[n]*iprod(r[b], r[blbnb[n]]);
559 pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
560 mvb = blc[b]*(iprod(r[b], dx) - bllen[b]);
567 /* Compute normalized i-j vectors */
568 for (b = b0; b < b1; b++)
572 tmp0 = x[i][0] - x[j][0];
573 tmp1 = x[i][1] - x[j][1];
574 tmp2 = x[i][2] - x[j][2];
575 rlen = gmx_invsqrt(tmp0*tmp0+tmp1*tmp1+tmp2*tmp2);
579 } /* 16 ncons flops */
582 for (b = b0; b < b1; b++)
590 for (n = blnr[b]; n < blnr[b+1]; n++)
593 blcc[n] = blmf[n]*(tmp0*r[k][0] + tmp1*r[k][1] + tmp2*r[k][2]);
595 mvb = blc[b]*(tmp0*(xp[i][0] - xp[j][0]) +
596 tmp1*(xp[i][1] - xp[j][1]) +
597 tmp2*(xp[i][2] - xp[j][2]) - len);
602 /* Together: 26*ncons + 6*nrtot flops */
605 lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
606 /* nrec*(ncons+2*nrtot) flops */
608 for (b = b0; b < b1; b++)
610 mlambda[b] = blc[b]*sol[b];
613 /* Update the coordinates */
614 lincs_update_atoms(lincsd, th, 1.0, mlambda, r, invmass, xp);
617 ******** Correction for centripetal effects ********
620 wfac = cos(DEG2RAD*wangle);
623 for (iter = 0; iter < lincsd->nIter; iter++)
625 if ((lincsd->bCommIter && DOMAINDECOMP(cr) && cr->dd->constraints))
630 /* Communicate the corrected non-local coordinates */
631 if (DOMAINDECOMP(cr))
633 dd_move_x_constraints(cr->dd, box, xp, NULL, FALSE);
639 for (b = b0; b < b1; b++)
644 pbc_dx_aiuc(pbc, xp[bla[2*b]], xp[bla[2*b+1]], dx);
648 rvec_sub(xp[bla[2*b]], xp[bla[2*b+1]], dx);
651 dlen2 = 2*len2 - norm2(dx);
652 if (dlen2 < wfac*len2 && (nlocat == NULL || nlocat[b]))
658 mvb = blc[b]*(len - dlen2*gmx_invsqrt(dlen2));
666 } /* 20*ncons flops */
668 lincs_matrix_expand(lincsd, b0, b1, blcc, rhs1, rhs2, sol);
669 /* nrec*(ncons+2*nrtot) flops */
671 for (b = b0; b < b1; b++)
678 /* Update the coordinates */
679 lincs_update_atoms(lincsd, th, 1.0, blc_sol, r, invmass, xp);
681 /* nit*ncons*(37+9*nrec) flops */
685 /* Update the velocities */
686 lincs_update_atoms(lincsd, th, invdt, mlambda, r, invmass, v);
690 if (nlocat != NULL && (bCalcDHDL || bCalcVir))
692 /* In lincs_update_atoms threads might cross-read mlambda */
695 /* Only account for local atoms */
696 for (b = b0; b < b1; b++)
698 mlambda[b] *= 0.5*nlocat[b];
707 for (b = b0; b < b1; b++)
709 /* Note that this this is dhdl*dt^2, the dt^2 factor is corrected
710 * later after the contributions are reduced over the threads.
712 dhdl -= lincsd->mlambda[b]*lincsd->ddist[b];
714 lincsd->th[th].dhdlambda = dhdl;
719 /* Constraint virial */
720 for (b = b0; b < b1; b++)
722 tmp0 = -bllen[b]*mlambda[b];
723 for (i = 0; i < DIM; i++)
726 for (j = 0; j < DIM; j++)
728 vir_r_m_dr[i][j] -= tmp1*r[b][j];
731 } /* 22 ncons flops */
735 * 26*ncons + 6*nrtot + nrec*(ncons+2*nrtot)
736 * + nit * (20*ncons + nrec*(ncons+2*nrtot) + 17 ncons)
738 * (26+nrec)*ncons + (6+2*nrec)*nrtot
739 * + nit * ((37+nrec)*ncons + 2*nrec*nrtot)
741 * (63+nrec)*ncons + (6+4*nrec)*nrtot
745 void set_lincs_matrix(struct gmx_lincsdata *li, real *invmass, real lambda)
747 int i, a1, a2, n, k, sign, center;
749 const real invsqrt2 = 0.7071067811865475244;
751 for (i = 0; (i < li->nc); i++)
755 li->blc[i] = gmx_invsqrt(invmass[a1] + invmass[a2]);
756 li->blc1[i] = invsqrt2;
759 /* Construct the coupling coefficient matrix blmf */
761 li->ncc_triangle = 0;
762 for (i = 0; (i < li->nc); i++)
766 for (n = li->blnr[i]; (n < li->blnr[i+1]); n++)
769 if (a1 == li->bla[2*k] || a2 == li->bla[2*k+1])
777 if (a1 == li->bla[2*k] || a1 == li->bla[2*k+1])
787 li->blmf[n] = sign*invmass[center]*li->blc[i]*li->blc[k];
788 li->blmf1[n] = sign*0.5;
789 if (li->ncg_triangle > 0)
791 /* Look for constraint triangles */
792 for (nk = li->blnr[k]; (nk < li->blnr[k+1]); nk++)
795 if (kk != i && kk != k &&
796 (li->bla[2*kk] == end || li->bla[2*kk+1] == end))
798 if (li->ntriangle == 0 ||
799 li->triangle[li->ntriangle-1] < i)
801 /* Add this constraint to the triangle list */
802 li->triangle[li->ntriangle] = i;
803 li->tri_bits[li->ntriangle] = 0;
805 if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
807 gmx_fatal(FARGS, "A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
808 li->blnr[i+1] - li->blnr[i],
809 sizeof(li->tri_bits[0])*8-1);
812 li->tri_bits[li->ntriangle-1] |= (1<<(n-li->blnr[i]));
822 fprintf(debug, "Of the %d constraints %d participate in triangles\n",
823 li->nc, li->ntriangle);
824 fprintf(debug, "There are %d couplings of which %d in triangles\n",
825 li->ncc, li->ncc_triangle);
829 * so we know with which lambda value the masses have been set.
834 static int count_triangle_constraints(t_ilist *ilist, t_blocka *at2con)
837 int c0, a00, a01, n1, c1, a10, a11, ac1, n2, c2, a20, a21;
840 t_iatom *ia1, *ia2, *iap;
842 ncon1 = ilist[F_CONSTR].nr/3;
843 ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
845 ia1 = ilist[F_CONSTR].iatoms;
846 ia2 = ilist[F_CONSTRNC].iatoms;
849 for (c0 = 0; c0 < ncon_tot; c0++)
852 iap = constr_iatomptr(ncon1, ia1, ia2, c0);
855 for (n1 = at2con->index[a01]; n1 < at2con->index[a01+1]; n1++)
860 iap = constr_iatomptr(ncon1, ia1, ia2, c1);
871 for (n2 = at2con->index[ac1]; n2 < at2con->index[ac1+1]; n2++)
874 if (c2 != c0 && c2 != c1)
876 iap = constr_iatomptr(ncon1, ia1, ia2, c2);
879 if (a20 == a00 || a21 == a00)
893 return ncon_triangle;
896 static gmx_bool more_than_two_sequential_constraints(const t_ilist *ilist,
897 const t_blocka *at2con)
899 t_iatom *ia1, *ia2, *iap;
900 int ncon1, ncon_tot, c;
902 gmx_bool bMoreThanTwoSequentialConstraints;
904 ncon1 = ilist[F_CONSTR].nr/3;
905 ncon_tot = ncon1 + ilist[F_CONSTRNC].nr/3;
907 ia1 = ilist[F_CONSTR].iatoms;
908 ia2 = ilist[F_CONSTRNC].iatoms;
910 bMoreThanTwoSequentialConstraints = FALSE;
911 for (c = 0; c < ncon_tot && !bMoreThanTwoSequentialConstraints; c++)
913 iap = constr_iatomptr(ncon1, ia1, ia2, c);
916 /* Check if this constraint has constraints connected at both atoms */
917 if (at2con->index[a1+1] - at2con->index[a1] > 1 &&
918 at2con->index[a2+1] - at2con->index[a2] > 1)
920 bMoreThanTwoSequentialConstraints = TRUE;
924 return bMoreThanTwoSequentialConstraints;
927 static int int_comp(const void *a, const void *b)
929 return (*(int *)a) - (*(int *)b);
932 gmx_lincsdata_t init_lincs(FILE *fplog, gmx_mtop_t *mtop,
933 int nflexcon_global, t_blocka *at2con,
934 gmx_bool bPLINCS, int nIter, int nProjOrder)
936 struct gmx_lincsdata *li;
942 fprintf(fplog, "\nInitializing%s LINear Constraint Solver\n",
943 bPLINCS ? " Parallel" : "");
949 gmx_mtop_ftype_count(mtop, F_CONSTR) +
950 gmx_mtop_ftype_count(mtop, F_CONSTRNC);
951 li->ncg_flex = nflexcon_global;
954 li->nOrder = nProjOrder;
956 li->ncg_triangle = 0;
957 li->bCommIter = FALSE;
958 for (mb = 0; mb < mtop->nmolblock; mb++)
960 molt = &mtop->moltype[mtop->molblock[mb].type];
962 mtop->molblock[mb].nmol*
963 count_triangle_constraints(molt->ilist,
964 &at2con[mtop->molblock[mb].type]);
965 if (bPLINCS && li->bCommIter == FALSE)
967 /* Check if we need to communicate not only before LINCS,
968 * but also before each iteration.
969 * The check for only two sequential constraints is only
970 * useful for the common case of H-bond only constraints.
971 * With more effort we could also make it useful for small
972 * molecules with nr. sequential constraints <= nOrder-1.
974 li->bCommIter = (li->nOrder < 1 || more_than_two_sequential_constraints(molt->ilist, &at2con[mtop->molblock[mb].type]));
977 if (debug && bPLINCS)
979 fprintf(debug, "PLINCS communication before each iteration: %d\n",
983 /* LINCS can run on any number of threads.
984 * Currently the number is fixed for the whole simulation,
985 * but it could be set in set_lincs().
987 li->nth = gmx_omp_nthreads_get(emntLINCS);
994 /* Allocate an extra elements for "thread-overlap" constraints */
995 snew(li->th, li->nth+1);
999 fprintf(debug, "LINCS: using %d threads\n", li->nth);
1002 if (bPLINCS || li->ncg_triangle > 0)
1004 please_cite(fplog, "Hess2008a");
1008 please_cite(fplog, "Hess97a");
1013 fprintf(fplog, "The number of constraints is %d\n", li->ncg);
1016 fprintf(fplog, "There are inter charge-group constraints,\n"
1017 "will communicate selected coordinates each lincs iteration\n");
1019 if (li->ncg_triangle > 0)
1022 "%d constraints are involved in constraint triangles,\n"
1023 "will apply an additional matrix expansion of order %d for couplings\n"
1024 "between constraints inside triangles\n",
1025 li->ncg_triangle, li->nOrder);
1032 /* Sets up the work division over the threads */
1033 static void lincs_thread_setup(struct gmx_lincsdata *li, int natoms)
1035 lincs_thread_t *li_m;
1040 if (natoms > li->atf_nalloc)
1042 li->atf_nalloc = over_alloc_large(natoms);
1043 srenew(li->atf, li->atf_nalloc);
1047 /* Clear the atom flags */
1048 for (a = 0; a < natoms; a++)
1053 for (th = 0; th < li->nth; th++)
1055 lincs_thread_t *li_th;
1058 li_th = &li->th[th];
1060 /* The constraints are divided equally over the threads */
1061 li_th->b0 = (li->nc* th )/li->nth;
1062 li_th->b1 = (li->nc*(th+1))/li->nth;
1064 if (th < sizeof(*atf)*8)
1066 /* For each atom set a flag for constraints from each */
1067 for (b = li_th->b0; b < li_th->b1; b++)
1069 atf[li->bla[b*2] ] |= (1U<<th);
1070 atf[li->bla[b*2+1]] |= (1U<<th);
1075 #pragma omp parallel for num_threads(li->nth) schedule(static)
1076 for (th = 0; th < li->nth; th++)
1078 lincs_thread_t *li_th;
1082 li_th = &li->th[th];
1084 if (li_th->b1 - li_th->b0 > li_th->ind_nalloc)
1086 li_th->ind_nalloc = over_alloc_large(li_th->b1-li_th->b0);
1087 srenew(li_th->ind, li_th->ind_nalloc);
1088 srenew(li_th->ind_r, li_th->ind_nalloc);
1091 if (th < sizeof(*atf)*8)
1093 mask = (1U<<th) - 1U;
1097 for (b = li_th->b0; b < li_th->b1; b++)
1099 /* We let the constraint with the lowest thread index
1100 * operate on atoms with constraints from multiple threads.
1102 if (((atf[li->bla[b*2]] & mask) == 0) &&
1103 ((atf[li->bla[b*2+1]] & mask) == 0))
1105 /* Add the constraint to the local atom update index */
1106 li_th->ind[li_th->nind++] = b;
1110 /* Add the constraint to the rest block */
1111 li_th->ind_r[li_th->nind_r++] = b;
1117 /* We are out of bits, assign all constraints to rest */
1118 for (b = li_th->b0; b < li_th->b1; b++)
1120 li_th->ind_r[li_th->nind_r++] = b;
1125 /* We need to copy all constraints which have not be assigned
1126 * to a thread to a separate list which will be handled by one thread.
1128 li_m = &li->th[li->nth];
1131 for (th = 0; th < li->nth; th++)
1133 lincs_thread_t *li_th;
1136 li_th = &li->th[th];
1138 if (li_m->nind + li_th->nind_r > li_m->ind_nalloc)
1140 li_m->ind_nalloc = over_alloc_large(li_m->nind+li_th->nind_r);
1141 srenew(li_m->ind, li_m->ind_nalloc);
1144 for (b = 0; b < li_th->nind_r; b++)
1146 li_m->ind[li_m->nind++] = li_th->ind_r[b];
1151 fprintf(debug, "LINCS thread %d: %d constraints\n",
1158 fprintf(debug, "LINCS thread r: %d constraints\n",
1164 void set_lincs(t_idef *idef, t_mdatoms *md,
1165 gmx_bool bDynamics, t_commrec *cr,
1166 struct gmx_lincsdata *li)
1168 int start, natoms, nflexcon;
1171 int i, k, ncc_alloc, ni, con, nconnect, concon;
1173 real lenA = 0, lenB;
1178 /* Zero the thread index ranges.
1179 * Otherwise without local constraints we could return with old ranges.
1181 for (i = 0; i < li->nth; i++)
1189 li->th[li->nth].nind = 0;
1192 /* This is the local topology, so there are only F_CONSTR constraints */
1193 if (idef->il[F_CONSTR].nr == 0)
1195 /* There are no constraints,
1196 * we do not need to fill any data structures.
1203 fprintf(debug, "Building the LINCS connectivity\n");
1206 if (DOMAINDECOMP(cr))
1208 if (cr->dd->constraints)
1210 dd_get_constraint_range(cr->dd, &start, &natoms);
1214 natoms = cr->dd->nat_home;
1221 natoms = md->homenr;
1223 at2con = make_at2con(start, natoms, idef->il, idef->iparams, bDynamics,
1227 if (idef->il[F_CONSTR].nr/3 > li->nc_alloc || li->nc_alloc == 0)
1229 li->nc_alloc = over_alloc_dd(idef->il[F_CONSTR].nr/3);
1230 srenew(li->bllen0, li->nc_alloc);
1231 srenew(li->ddist, li->nc_alloc);
1232 srenew(li->bla, 2*li->nc_alloc);
1233 srenew(li->blc, li->nc_alloc);
1234 srenew(li->blc1, li->nc_alloc);
1235 srenew(li->blnr, li->nc_alloc+1);
1236 srenew(li->bllen, li->nc_alloc);
1237 srenew(li->tmpv, li->nc_alloc);
1238 srenew(li->tmp1, li->nc_alloc);
1239 srenew(li->tmp2, li->nc_alloc);
1240 srenew(li->tmp3, li->nc_alloc);
1241 srenew(li->tmp4, li->nc_alloc);
1242 srenew(li->mlambda, li->nc_alloc);
1243 if (li->ncg_triangle > 0)
1245 /* This is allocating too much, but it is difficult to improve */
1246 srenew(li->triangle, li->nc_alloc);
1247 srenew(li->tri_bits, li->nc_alloc);
1251 iatom = idef->il[F_CONSTR].iatoms;
1253 ncc_alloc = li->ncc_alloc;
1256 ni = idef->il[F_CONSTR].nr/3;
1260 li->blnr[con] = nconnect;
1261 for (i = 0; i < ni; i++)
1267 lenA = idef->iparams[type].constr.dA;
1268 lenB = idef->iparams[type].constr.dB;
1269 /* Skip the flexible constraints when not doing dynamics */
1270 if (bDynamics || lenA != 0 || lenB != 0)
1272 li->bllen0[con] = lenA;
1273 li->ddist[con] = lenB - lenA;
1274 /* Set the length to the topology A length */
1275 li->bllen[con] = li->bllen0[con];
1276 li->bla[2*con] = a1;
1277 li->bla[2*con+1] = a2;
1278 /* Construct the constraint connection matrix blbnb */
1279 for (k = at2con.index[a1-start]; k < at2con.index[a1-start+1]; k++)
1281 concon = at2con.a[k];
1284 if (nconnect >= ncc_alloc)
1286 ncc_alloc = over_alloc_small(nconnect+1);
1287 srenew(li->blbnb, ncc_alloc);
1289 li->blbnb[nconnect++] = concon;
1292 for (k = at2con.index[a2-start]; k < at2con.index[a2-start+1]; k++)
1294 concon = at2con.a[k];
1297 if (nconnect+1 > ncc_alloc)
1299 ncc_alloc = over_alloc_small(nconnect+1);
1300 srenew(li->blbnb, ncc_alloc);
1302 li->blbnb[nconnect++] = concon;
1305 li->blnr[con+1] = nconnect;
1309 /* Order the blbnb matrix to optimize memory access */
1310 qsort(&(li->blbnb[li->blnr[con]]), li->blnr[con+1]-li->blnr[con],
1311 sizeof(li->blbnb[0]), int_comp);
1313 /* Increase the constraint count */
1318 done_blocka(&at2con);
1320 /* This is the real number of constraints,
1321 * without dynamics the flexible constraints are not present.
1325 li->ncc = li->blnr[con];
1328 /* Since the matrix is static, we can free some memory */
1329 ncc_alloc = li->ncc;
1330 srenew(li->blbnb, ncc_alloc);
1333 if (ncc_alloc > li->ncc_alloc)
1335 li->ncc_alloc = ncc_alloc;
1336 srenew(li->blmf, li->ncc_alloc);
1337 srenew(li->blmf1, li->ncc_alloc);
1338 srenew(li->tmpncc, li->ncc_alloc);
1343 fprintf(debug, "Number of constraints is %d, couplings %d\n",
1350 li->th[0].b1 = li->nc;
1354 lincs_thread_setup(li, md->nr);
1357 set_lincs_matrix(li, md->invmass, md->lambda);
1360 static void lincs_warning(FILE *fplog,
1361 gmx_domdec_t *dd, rvec *x, rvec *xprime, t_pbc *pbc,
1362 int ncons, int *bla, real *bllen, real wangle,
1363 int maxwarn, int *warncount)
1367 real wfac, d0, d1, cosine;
1370 wfac = cos(DEG2RAD*wangle);
1372 sprintf(buf, "bonds that rotated more than %g degrees:\n"
1373 " atom 1 atom 2 angle previous, current, constraint length\n",
1375 fprintf(stderr, "%s", buf);
1378 fprintf(fplog, "%s", buf);
1381 for (b = 0; b < ncons; b++)
1387 pbc_dx_aiuc(pbc, x[i], x[j], v0);
1388 pbc_dx_aiuc(pbc, xprime[i], xprime[j], v1);
1392 rvec_sub(x[i], x[j], v0);
1393 rvec_sub(xprime[i], xprime[j], v1);
1397 cosine = iprod(v0, v1)/(d0*d1);
1400 sprintf(buf, " %6d %6d %5.1f %8.4f %8.4f %8.4f\n",
1401 ddglatnr(dd, i), ddglatnr(dd, j),
1402 RAD2DEG*acos(cosine), d0, d1, bllen[b]);
1403 fprintf(stderr, "%s", buf);
1406 fprintf(fplog, "%s", buf);
1408 if (!gmx_isfinite(d1))
1410 gmx_fatal(FARGS, "Bond length not finite.");
1416 if (*warncount > maxwarn)
1418 too_many_constraint_warnings(econtLINCS, *warncount);
1422 static void cconerr(gmx_domdec_t *dd,
1423 int ncons, int *bla, real *bllen, rvec *x, t_pbc *pbc,
1424 real *ncons_loc, real *ssd, real *max, int *imax)
1426 real len, d, ma, ssd2, r2;
1427 int *nlocat, count, b, im;
1430 if (dd && dd->constraints)
1432 nlocat = dd_constraints_nlocalatoms(dd);
1443 for (b = 0; b < ncons; b++)
1447 pbc_dx_aiuc(pbc, x[bla[2*b]], x[bla[2*b+1]], dx);
1451 rvec_sub(x[bla[2*b]], x[bla[2*b+1]], dx);
1454 len = r2*gmx_invsqrt(r2);
1455 d = fabs(len/bllen[b]-1);
1456 if (d > ma && (nlocat == NULL || nlocat[b]))
1468 ssd2 += nlocat[b]*d*d;
1473 *ncons_loc = (nlocat ? 0.5 : 1)*count;
1474 *ssd = (nlocat ? 0.5 : 1)*ssd2;
1479 gmx_bool constrain_lincs(FILE *fplog, gmx_bool bLog, gmx_bool bEner,
1482 struct gmx_lincsdata *lincsd, t_mdatoms *md,
1484 rvec *x, rvec *xprime, rvec *min_proj,
1485 matrix box, t_pbc *pbc,
1486 real lambda, real *dvdlambda,
1487 real invdt, rvec *v,
1488 gmx_bool bCalcVir, tensor vir_r_m_dr,
1491 int maxwarn, int *warncount)
1494 char buf[STRLEN], buf2[22], buf3[STRLEN];
1495 int i, warn, p_imax, error;
1496 real ncons_loc, p_ssd, p_max = 0;
1502 /* This boolean should be set by a flag passed to this routine.
1503 * We can also easily check if any constraint length is changed,
1504 * if not dH/dlambda=0 and we can also set the boolean to FALSE.
1506 bCalcDHDL = (ir->efep != efepNO && dvdlambda != NULL);
1508 if (lincsd->nc == 0 && cr->dd == NULL)
1512 lincsd->rmsd_data[0] = 0;
1513 if (ir->eI == eiSD2 && v == NULL)
1521 lincsd->rmsd_data[i] = 0;
1527 if (econq == econqCoord)
1529 /* We can't use bCalcDHDL here, since NULL can be passed for dvdlambda
1530 * also with efep!=fepNO.
1532 if (ir->efep != efepNO)
1534 if (md->nMassPerturbed && lincsd->matlam != md->lambda)
1536 set_lincs_matrix(lincsd, md->invmass, md->lambda);
1539 for (i = 0; i < lincsd->nc; i++)
1541 lincsd->bllen[i] = lincsd->bllen0[i] + lambda*lincsd->ddist[i];
1545 if (lincsd->ncg_flex)
1547 /* Set the flexible constraint lengths to the old lengths */
1550 for (i = 0; i < lincsd->nc; i++)
1552 if (lincsd->bllen[i] == 0)
1554 pbc_dx_aiuc(pbc, x[lincsd->bla[2*i]], x[lincsd->bla[2*i+1]], dx);
1555 lincsd->bllen[i] = norm(dx);
1561 for (i = 0; i < lincsd->nc; i++)
1563 if (lincsd->bllen[i] == 0)
1566 sqrt(distance2(x[lincsd->bla[2*i]],
1567 x[lincsd->bla[2*i+1]]));
1575 cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1576 &ncons_loc, &p_ssd, &p_max, &p_imax);
1579 /* This warn var can be updated by multiple threads
1580 * at the same time. But as we only need to detect
1581 * if a warning occured or not, this is not an issue.
1585 /* The OpenMP parallel region of constrain_lincs for coords */
1586 #pragma omp parallel num_threads(lincsd->nth)
1588 int th = gmx_omp_get_thread_num();
1590 clear_mat(lincsd->th[th].vir_r_m_dr);
1592 do_lincs(x, xprime, box, pbc, lincsd, th,
1595 ir->LincsWarnAngle, &warn,
1597 th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1600 if (bLog && fplog && lincsd->nc > 0)
1602 fprintf(fplog, " Rel. Constraint Deviation: RMS MAX between atoms\n");
1603 fprintf(fplog, " Before LINCS %.6f %.6f %6d %6d\n",
1604 sqrt(p_ssd/ncons_loc), p_max,
1605 ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1606 ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1610 cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1611 &ncons_loc, &p_ssd, &p_max, &p_imax);
1612 /* Check if we are doing the second part of SD */
1613 if (ir->eI == eiSD2 && v == NULL)
1621 lincsd->rmsd_data[0] = ncons_loc;
1622 lincsd->rmsd_data[i] = p_ssd;
1626 lincsd->rmsd_data[0] = 0;
1627 lincsd->rmsd_data[1] = 0;
1628 lincsd->rmsd_data[2] = 0;
1630 if (bLog && fplog && lincsd->nc > 0)
1633 " After LINCS %.6f %.6f %6d %6d\n\n",
1634 sqrt(p_ssd/ncons_loc), p_max,
1635 ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1636 ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1643 cconerr(cr->dd, lincsd->nc, lincsd->bla, lincsd->bllen, xprime, pbc,
1644 &ncons_loc, &p_ssd, &p_max, &p_imax);
1647 sprintf(buf3, " in simulation %d", cr->ms->sim);
1653 sprintf(buf, "\nStep %s, time %g (ps) LINCS WARNING%s\n"
1654 "relative constraint deviation after LINCS:\n"
1655 "rms %.6f, max %.6f (between atoms %d and %d)\n",
1656 gmx_step_str(step, buf2), ir->init_t+step*ir->delta_t,
1658 sqrt(p_ssd/ncons_loc), p_max,
1659 ddglatnr(cr->dd, lincsd->bla[2*p_imax]),
1660 ddglatnr(cr->dd, lincsd->bla[2*p_imax+1]));
1663 fprintf(fplog, "%s", buf);
1665 fprintf(stderr, "%s", buf);
1666 lincs_warning(fplog, cr->dd, x, xprime, pbc,
1667 lincsd->nc, lincsd->bla, lincsd->bllen,
1668 ir->LincsWarnAngle, maxwarn, warncount);
1670 bOK = (p_max < 0.5);
1673 if (lincsd->ncg_flex)
1675 for (i = 0; (i < lincsd->nc); i++)
1677 if (lincsd->bllen0[i] == 0 && lincsd->ddist[i] == 0)
1679 lincsd->bllen[i] = 0;
1686 /* The OpenMP parallel region of constrain_lincs for derivatives */
1687 #pragma omp parallel num_threads(lincsd->nth)
1689 int th = gmx_omp_get_thread_num();
1691 do_lincsp(x, xprime, min_proj, pbc, lincsd, th,
1692 md->invmass, econq, bCalcDHDL,
1693 bCalcVir, th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
1699 /* Reduce the dH/dlambda contributions over the threads */
1704 for (th = 0; th < lincsd->nth; th++)
1706 dhdlambda += lincsd->th[th].dhdlambda;
1710 /* dhdlambda contains dH/dlambda*dt^2, correct for this */
1711 dhdlambda /= ir->delta_t*ir->delta_t;
1713 *dvdlambda += dhdlambda;
1716 if (bCalcVir && lincsd->nth > 1)
1718 for (i = 1; i < lincsd->nth; i++)
1720 m_add(vir_r_m_dr, lincsd->th[i].vir_r_m_dr, vir_r_m_dr);
1724 /* count assuming nit=1 */
1725 inc_nrnb(nrnb, eNR_LINCS, lincsd->nc);
1726 inc_nrnb(nrnb, eNR_LINCSMAT, (2+lincsd->nOrder)*lincsd->ncc);
1727 if (lincsd->ntriangle > 0)
1729 inc_nrnb(nrnb, eNR_LINCSMAT, lincsd->nOrder*lincsd->ncc_triangle);
1733 inc_nrnb(nrnb, eNR_CONSTR_V, lincsd->nc*2);
1737 inc_nrnb(nrnb, eNR_CONSTR_VIR, lincsd->nc);