2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/legacyheaders/typedefs.h"
44 #include "gromacs/legacyheaders/types/commrec.h"
45 #include "gromacs/legacyheaders/vsite.h"
46 #include "gromacs/legacyheaders/macros.h"
47 #include "gromacs/legacyheaders/nrnb.h"
48 #include "gromacs/math/vec.h"
49 #include "gromacs/legacyheaders/network.h"
50 #include "gromacs/legacyheaders/domdec.h"
51 #include "gromacs/topology/mtop_util.h"
52 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
54 #include "gromacs/pbcutil/ishift.h"
55 #include "gromacs/pbcutil/mshift.h"
56 #include "gromacs/pbcutil/pbc.h"
57 #include "gromacs/utility/gmxomp.h"
58 #include "gromacs/utility/smalloc.h"
60 /* Routines to send/recieve coordinates and force
61 * of constructing atoms.
64 static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
68 return pbc_dx_aiuc(pbc, xi, xj, dx);
77 /* Vsite construction routines */
79 static void constr_vsite2(rvec xi, rvec xj, rvec x, real a, t_pbc *pbc)
89 pbc_dx_aiuc(pbc, xj, xi, dx);
90 x[XX] = xi[XX] + a*dx[XX];
91 x[YY] = xi[YY] + a*dx[YY];
92 x[ZZ] = xi[ZZ] + a*dx[ZZ];
96 x[XX] = b*xi[XX] + a*xj[XX];
97 x[YY] = b*xi[YY] + a*xj[YY];
98 x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
102 /* TOTAL: 10 flops */
105 static void constr_vsite3(rvec xi, rvec xj, rvec xk, rvec x, real a, real b,
116 pbc_dx_aiuc(pbc, xj, xi, dxj);
117 pbc_dx_aiuc(pbc, xk, xi, dxk);
118 x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
119 x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
120 x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
124 x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
125 x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
126 x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
130 /* TOTAL: 17 flops */
133 static void constr_vsite3FD(rvec xi, rvec xj, rvec xk, rvec x, real a, real b,
139 pbc_rvec_sub(pbc, xj, xi, xij);
140 pbc_rvec_sub(pbc, xk, xj, xjk);
143 /* temp goes from i to a point on the line jk */
144 temp[XX] = xij[XX] + a*xjk[XX];
145 temp[YY] = xij[YY] + a*xjk[YY];
146 temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
149 c = b*gmx_invsqrt(iprod(temp, temp));
152 x[XX] = xi[XX] + c*temp[XX];
153 x[YY] = xi[YY] + c*temp[YY];
154 x[ZZ] = xi[ZZ] + c*temp[ZZ];
157 /* TOTAL: 34 flops */
160 static void constr_vsite3FAD(rvec xi, rvec xj, rvec xk, rvec x, real a, real b, t_pbc *pbc)
163 real a1, b1, c1, invdij;
165 pbc_rvec_sub(pbc, xj, xi, xij);
166 pbc_rvec_sub(pbc, xk, xj, xjk);
169 invdij = gmx_invsqrt(iprod(xij, xij));
170 c1 = invdij * invdij * iprod(xij, xjk);
171 xp[XX] = xjk[XX] - c1*xij[XX];
172 xp[YY] = xjk[YY] - c1*xij[YY];
173 xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
175 b1 = b*gmx_invsqrt(iprod(xp, xp));
178 x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
179 x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
180 x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
183 /* TOTAL: 63 flops */
186 static void constr_vsite3OUT(rvec xi, rvec xj, rvec xk, rvec x,
187 real a, real b, real c, t_pbc *pbc)
191 pbc_rvec_sub(pbc, xj, xi, xij);
192 pbc_rvec_sub(pbc, xk, xi, xik);
193 cprod(xij, xik, temp);
196 x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
197 x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
198 x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
201 /* TOTAL: 33 flops */
204 static void constr_vsite4FD(rvec xi, rvec xj, rvec xk, rvec xl, rvec x,
205 real a, real b, real c, t_pbc *pbc)
207 rvec xij, xjk, xjl, temp;
210 pbc_rvec_sub(pbc, xj, xi, xij);
211 pbc_rvec_sub(pbc, xk, xj, xjk);
212 pbc_rvec_sub(pbc, xl, xj, xjl);
215 /* temp goes from i to a point on the plane jkl */
216 temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
217 temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
218 temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
221 d = c*gmx_invsqrt(iprod(temp, temp));
224 x[XX] = xi[XX] + d*temp[XX];
225 x[YY] = xi[YY] + d*temp[YY];
226 x[ZZ] = xi[ZZ] + d*temp[ZZ];
229 /* TOTAL: 43 flops */
233 static void constr_vsite4FDN(rvec xi, rvec xj, rvec xk, rvec xl, rvec x,
234 real a, real b, real c, t_pbc *pbc)
236 rvec xij, xik, xil, ra, rb, rja, rjb, rm;
239 pbc_rvec_sub(pbc, xj, xi, xij);
240 pbc_rvec_sub(pbc, xk, xi, xik);
241 pbc_rvec_sub(pbc, xl, xi, xil);
254 rvec_sub(ra, xij, rja);
255 rvec_sub(rb, xij, rjb);
261 d = c*gmx_invsqrt(norm2(rm));
264 x[XX] = xi[XX] + d*rm[XX];
265 x[YY] = xi[YY] + d*rm[YY];
266 x[ZZ] = xi[ZZ] + d*rm[ZZ];
269 /* TOTAL: 47 flops */
273 static int constr_vsiten(t_iatom *ia, t_iparams ip[],
281 n3 = 3*ip[ia[0]].vsiten.n;
284 copy_rvec(x[ai], x1);
286 for (i = 3; i < n3; i += 3)
289 a = ip[ia[i]].vsiten.a;
292 pbc_dx_aiuc(pbc, x[ai], x1, dx);
296 rvec_sub(x[ai], x1, dx);
298 dsum[XX] += a*dx[XX];
299 dsum[YY] += a*dx[YY];
300 dsum[ZZ] += a*dx[ZZ];
304 x[av][XX] = x1[XX] + dsum[XX];
305 x[av][YY] = x1[YY] + dsum[YY];
306 x[av][ZZ] = x1[ZZ] + dsum[ZZ];
312 void construct_vsites_thread(gmx_vsite_t *vsite,
315 t_iparams ip[], t_ilist ilist[],
319 rvec xpbc, xv, vv, dx;
320 real a1, b1, c1, inv_dt;
321 int i, inc, ii, nra, nr, tp, ftype;
322 t_iatom avsite, ai, aj, ak, al, pbc_atom;
325 int *vsite_pbc, ishift;
326 rvec reftmp, vtmp, rtmp;
337 bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
341 for (ftype = 0; (ftype < F_NRE); ftype++)
343 if ((interaction_function[ftype].flags & IF_VSITE) &&
346 nra = interaction_function[ftype].nratoms;
348 nr = ilist[ftype].nr;
349 ia = ilist[ftype].iatoms;
353 pbc_null2 = pbc_null;
355 else if (pbc_null != NULL)
357 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
360 for (i = 0; i < nr; )
364 /* The vsite and constructing atoms */
368 /* Constants for constructing vsites */
370 /* Check what kind of pbc we need to use */
373 /* No charge groups, vsite follows its own pbc */
375 copy_rvec(x[avsite], xpbc);
377 else if (vsite_pbc != NULL)
379 pbc_atom = vsite_pbc[i/(1+nra)];
384 /* We need to copy the coordinates here,
385 * single for single atom cg's pbc_atom
386 * is the vsite itself.
388 copy_rvec(x[pbc_atom], xpbc);
390 pbc_null2 = pbc_null;
401 /* Copy the old position */
402 copy_rvec(x[avsite], xv);
404 /* Construct the vsite depending on type */
409 constr_vsite2(x[ai], x[aj], x[avsite], a1, pbc_null2);
415 constr_vsite3(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
421 constr_vsite3FD(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
427 constr_vsite3FAD(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
434 constr_vsite3OUT(x[ai], x[aj], x[ak], x[avsite], a1, b1, c1, pbc_null2);
442 constr_vsite4FD(x[ai], x[aj], x[ak], x[al], x[avsite], a1, b1, c1,
451 constr_vsite4FDN(x[ai], x[aj], x[ak], x[al], x[avsite], a1, b1, c1,
455 inc = constr_vsiten(ia, ip, x, pbc_null2);
458 gmx_fatal(FARGS, "No such vsite type %d in %s, line %d",
459 ftype, __FILE__, __LINE__);
464 /* Match the pbc of this vsite to the rest of its charge group */
465 ishift = pbc_dx_aiuc(pbc_null, x[avsite], xpbc, dx);
466 if (ishift != CENTRAL)
468 rvec_add(xpbc, dx, x[avsite]);
473 /* Calculate velocity of vsite... */
474 rvec_sub(x[avsite], xv, vv);
475 svmul(inv_dt, vv, v[avsite]);
478 /* Increment loop variables */
486 void construct_vsites(gmx_vsite_t *vsite,
489 t_iparams ip[], t_ilist ilist[],
490 int ePBC, gmx_bool bMolPBC,
491 t_commrec *cr, matrix box)
493 t_pbc pbc, *pbc_null;
497 bDomDec = cr && DOMAINDECOMP(cr);
499 /* We only need to do pbc when we have inter-cg vsites */
500 if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite)
502 /* This is wasting some CPU time as we now do this multiple times
503 * per MD step. But how often do we have vsites with full pbc?
505 pbc_null = set_pbc_dd(&pbc, ePBC, cr != NULL ? cr->dd : NULL, FALSE, box);
514 dd_move_x_vsites(cr->dd, box, x);
517 if (vsite->nthreads == 1)
519 construct_vsites_thread(vsite,
526 #pragma omp parallel num_threads(vsite->nthreads)
528 construct_vsites_thread(vsite,
530 ip, vsite->tdata[gmx_omp_get_thread_num()].ilist,
533 /* Now we can construct the vsites that might depend on other vsites */
534 construct_vsites_thread(vsite,
536 ip, vsite->tdata[vsite->nthreads].ilist,
541 static void spread_vsite2(t_iatom ia[], real a,
542 rvec x[], rvec f[], rvec fshift[],
543 t_pbc *pbc, t_graph *g)
555 svmul(1-a, f[av], fi);
556 svmul( a, f[av], fj);
565 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, av), di);
567 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), di);
572 siv = pbc_dx_aiuc(pbc, x[ai], x[av], dx);
573 sij = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
581 if (fshift && (siv != CENTRAL || sij != CENTRAL))
583 rvec_inc(fshift[siv], f[av]);
584 rvec_dec(fshift[CENTRAL], fi);
585 rvec_dec(fshift[sij], fj);
588 /* TOTAL: 13 flops */
591 void construct_vsites_mtop(gmx_vsite_t *vsite,
592 gmx_mtop_t *mtop, rvec x[])
595 gmx_molblock_t *molb;
599 for (mb = 0; mb < mtop->nmolblock; mb++)
601 molb = &mtop->molblock[mb];
602 molt = &mtop->moltype[molb->type];
603 for (mol = 0; mol < molb->nmol; mol++)
605 construct_vsites(vsite, x+as, 0.0, NULL,
606 mtop->ffparams.iparams, molt->ilist,
607 epbcNONE, TRUE, NULL, NULL);
608 as += molt->atoms.nr;
613 static void spread_vsite3(t_iatom ia[], real a, real b,
614 rvec x[], rvec f[], rvec fshift[],
615 t_pbc *pbc, t_graph *g)
618 atom_id av, ai, aj, ak;
627 svmul(1-a-b, f[av], fi);
628 svmul( a, f[av], fj);
629 svmul( b, f[av], fk);
639 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ia[1]), di);
641 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), di);
643 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), di);
648 siv = pbc_dx_aiuc(pbc, x[ai], x[av], dx);
649 sij = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
650 sik = pbc_dx_aiuc(pbc, x[ai], x[ak], dx);
659 if (fshift && (siv != CENTRAL || sij != CENTRAL || sik != CENTRAL))
661 rvec_inc(fshift[siv], f[av]);
662 rvec_dec(fshift[CENTRAL], fi);
663 rvec_dec(fshift[sij], fj);
664 rvec_dec(fshift[sik], fk);
667 /* TOTAL: 20 flops */
670 static void spread_vsite3FD(t_iatom ia[], real a, real b,
671 rvec x[], rvec f[], rvec fshift[],
672 gmx_bool VirCorr, matrix dxdf,
673 t_pbc *pbc, t_graph *g)
675 real fx, fy, fz, c, invl, fproj, a1;
676 rvec xvi, xij, xjk, xix, fv, temp;
677 t_iatom av, ai, aj, ak;
678 int svi, sji, skj, d;
685 copy_rvec(f[av], fv);
687 sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
688 skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
691 /* xix goes from i to point x on the line jk */
692 xix[XX] = xij[XX]+a*xjk[XX];
693 xix[YY] = xij[YY]+a*xjk[YY];
694 xix[ZZ] = xij[ZZ]+a*xjk[ZZ];
697 invl = gmx_invsqrt(iprod(xix, xix));
701 fproj = iprod(xix, fv)*invl*invl; /* = (xix . f)/(xix . xix) */
703 temp[XX] = c*(fv[XX]-fproj*xix[XX]);
704 temp[YY] = c*(fv[YY]-fproj*xix[YY]);
705 temp[ZZ] = c*(fv[ZZ]-fproj*xix[ZZ]);
708 /* c is already calculated in constr_vsite3FD
709 storing c somewhere will save 26 flops! */
712 f[ai][XX] += fv[XX] - temp[XX];
713 f[ai][YY] += fv[YY] - temp[YY];
714 f[ai][ZZ] += fv[ZZ] - temp[ZZ];
715 f[aj][XX] += a1*temp[XX];
716 f[aj][YY] += a1*temp[YY];
717 f[aj][ZZ] += a1*temp[ZZ];
718 f[ak][XX] += a*temp[XX];
719 f[ak][YY] += a*temp[YY];
720 f[ak][ZZ] += a*temp[ZZ];
725 ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
727 ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
729 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
734 svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
741 if (fshift && (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL))
743 rvec_dec(fshift[svi], fv);
744 fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
745 fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
746 fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
747 fshift[ sji][XX] += temp[XX];
748 fshift[ sji][YY] += temp[YY];
749 fshift[ sji][ZZ] += temp[ZZ];
750 fshift[ skj][XX] += a*temp[XX];
751 fshift[ skj][YY] += a*temp[YY];
752 fshift[ skj][ZZ] += a*temp[ZZ];
757 /* When VirCorr=TRUE, the virial for the current forces is not
758 * calculated from the redistributed forces. This means that
759 * the effect of non-linear virtual site constructions on the virial
760 * needs to be added separately. This contribution can be calculated
761 * in many ways, but the simplest and cheapest way is to use
762 * the first constructing atom ai as a reference position in space:
763 * subtract (xv-xi)*fv and add (xj-xi)*fj + (xk-xi)*fk.
768 pbc_rvec_sub(pbc, x[av], x[ai], xiv);
770 for (i = 0; i < DIM; i++)
772 for (j = 0; j < DIM; j++)
774 /* As xix is a linear combination of j and k, use that here */
775 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
780 /* TOTAL: 61 flops */
783 static void spread_vsite3FAD(t_iatom ia[], real a, real b,
784 rvec x[], rvec f[], rvec fshift[],
785 gmx_bool VirCorr, matrix dxdf,
786 t_pbc *pbc, t_graph *g)
788 rvec xvi, xij, xjk, xperp, Fpij, Fppp, fv, f1, f2, f3;
789 real a1, b1, c1, c2, invdij, invdij2, invdp, fproj;
790 t_iatom av, ai, aj, ak;
791 int svi, sji, skj, d;
798 copy_rvec(f[ia[1]], fv);
800 sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
801 skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
804 invdij = gmx_invsqrt(iprod(xij, xij));
805 invdij2 = invdij * invdij;
806 c1 = iprod(xij, xjk) * invdij2;
807 xperp[XX] = xjk[XX] - c1*xij[XX];
808 xperp[YY] = xjk[YY] - c1*xij[YY];
809 xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
810 /* xperp in plane ijk, perp. to ij */
811 invdp = gmx_invsqrt(iprod(xperp, xperp));
816 /* a1, b1 and c1 are already calculated in constr_vsite3FAD
817 storing them somewhere will save 45 flops! */
819 fproj = iprod(xij, fv)*invdij2;
820 svmul(fproj, xij, Fpij); /* proj. f on xij */
821 svmul(iprod(xperp, fv)*invdp*invdp, xperp, Fppp); /* proj. f on xperp */
822 svmul(b1*fproj, xperp, f3);
825 rvec_sub(fv, Fpij, f1); /* f1 = f - Fpij */
826 rvec_sub(f1, Fppp, f2); /* f2 = f - Fpij - Fppp */
827 for (d = 0; (d < DIM); d++)
835 f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
836 f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
837 f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
838 f[aj][XX] += f1[XX] - c2*f2[XX] - f3[XX];
839 f[aj][YY] += f1[YY] - c2*f2[YY] - f3[YY];
840 f[aj][ZZ] += f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
848 ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
850 ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
852 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
857 svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
864 if (fshift && (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL))
866 rvec_dec(fshift[svi], fv);
867 fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
868 fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
869 fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
870 fshift[ sji][XX] += f1[XX] - c1 *f2[XX] - f3[XX];
871 fshift[ sji][YY] += f1[YY] - c1 *f2[YY] - f3[YY];
872 fshift[ sji][ZZ] += f1[ZZ] - c1 *f2[ZZ] - f3[ZZ];
873 fshift[ skj][XX] += f2[XX];
874 fshift[ skj][YY] += f2[YY];
875 fshift[ skj][ZZ] += f2[ZZ];
883 pbc_rvec_sub(pbc, x[av], x[ai], xiv);
885 for (i = 0; i < DIM; i++)
887 for (j = 0; j < DIM; j++)
889 /* Note that xik=xij+xjk, so we have to add xij*f2 */
892 + xij[i]*(f1[j] + (1 - c2)*f2[j] - f3[j])
898 /* TOTAL: 113 flops */
901 static void spread_vsite3OUT(t_iatom ia[], real a, real b, real c,
902 rvec x[], rvec f[], rvec fshift[],
903 gmx_bool VirCorr, matrix dxdf,
904 t_pbc *pbc, t_graph *g)
906 rvec xvi, xij, xik, fv, fj, fk;
908 atom_id av, ai, aj, ak;
917 sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
918 ski = pbc_rvec_sub(pbc, x[ak], x[ai], xik);
921 copy_rvec(f[av], fv);
928 fj[XX] = a*fv[XX] - xik[ZZ]*cfy + xik[YY]*cfz;
929 fj[YY] = xik[ZZ]*cfx + a*fv[YY] - xik[XX]*cfz;
930 fj[ZZ] = -xik[YY]*cfx + xik[XX]*cfy + a*fv[ZZ];
932 fk[XX] = b*fv[XX] + xij[ZZ]*cfy - xij[YY]*cfz;
933 fk[YY] = -xij[ZZ]*cfx + b*fv[YY] + xij[XX]*cfz;
934 fk[ZZ] = xij[YY]*cfx - xij[XX]*cfy + b*fv[ZZ];
937 f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
938 f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
939 f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
946 ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
948 ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
950 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, ai), di);
955 svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
962 if (fshift && (svi != CENTRAL || sji != CENTRAL || ski != CENTRAL))
964 rvec_dec(fshift[svi], fv);
965 fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
966 fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
967 fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
968 rvec_inc(fshift[sji], fj);
969 rvec_inc(fshift[ski], fk);
977 pbc_rvec_sub(pbc, x[av], x[ai], xiv);
979 for (i = 0; i < DIM; i++)
981 for (j = 0; j < DIM; j++)
983 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j];
988 /* TOTAL: 54 flops */
991 static void spread_vsite4FD(t_iatom ia[], real a, real b, real c,
992 rvec x[], rvec f[], rvec fshift[],
993 gmx_bool VirCorr, matrix dxdf,
994 t_pbc *pbc, t_graph *g)
996 real d, invl, fproj, a1;
997 rvec xvi, xij, xjk, xjl, xix, fv, temp;
998 atom_id av, ai, aj, ak, al;
1000 int svi, sji, skj, slj, m;
1008 sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
1009 skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
1010 slj = pbc_rvec_sub(pbc, x[al], x[aj], xjl);
1013 /* xix goes from i to point x on the plane jkl */
1014 for (m = 0; m < DIM; m++)
1016 xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
1020 invl = gmx_invsqrt(iprod(xix, xix));
1022 /* 4 + ?10? flops */
1024 copy_rvec(f[av], fv);
1026 fproj = iprod(xix, fv)*invl*invl; /* = (xix . f)/(xix . xix) */
1028 for (m = 0; m < DIM; m++)
1030 temp[m] = d*(fv[m] - fproj*xix[m]);
1034 /* c is already calculated in constr_vsite3FD
1035 storing c somewhere will save 35 flops! */
1038 for (m = 0; m < DIM; m++)
1040 f[ai][m] += fv[m] - temp[m];
1041 f[aj][m] += a1*temp[m];
1042 f[ak][m] += a*temp[m];
1043 f[al][m] += b*temp[m];
1049 ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
1051 ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
1053 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
1055 ivec_sub(SHIFT_IVEC(g, al), SHIFT_IVEC(g, aj), di);
1060 svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
1068 (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL || slj != CENTRAL))
1070 rvec_dec(fshift[svi], fv);
1071 for (m = 0; m < DIM; m++)
1073 fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
1074 fshift[ sji][m] += temp[m];
1075 fshift[ skj][m] += a*temp[m];
1076 fshift[ slj][m] += b*temp[m];
1085 pbc_rvec_sub(pbc, x[av], x[ai], xiv);
1087 for (i = 0; i < DIM; i++)
1089 for (j = 0; j < DIM; j++)
1091 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
1096 /* TOTAL: 77 flops */
1100 static void spread_vsite4FDN(t_iatom ia[], real a, real b, real c,
1101 rvec x[], rvec f[], rvec fshift[],
1102 gmx_bool VirCorr, matrix dxdf,
1103 t_pbc *pbc, t_graph *g)
1105 rvec xvi, xij, xik, xil, ra, rb, rja, rjb, rab, rm, rt;
1106 rvec fv, fj, fk, fl;
1110 int av, ai, aj, ak, al;
1111 int svi, sij, sik, sil;
1113 /* DEBUG: check atom indices */
1120 copy_rvec(f[av], fv);
1122 sij = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
1123 sik = pbc_rvec_sub(pbc, x[ak], x[ai], xik);
1124 sil = pbc_rvec_sub(pbc, x[al], x[ai], xil);
1137 rvec_sub(ra, xij, rja);
1138 rvec_sub(rb, xij, rjb);
1139 rvec_sub(rb, ra, rab);
1142 cprod(rja, rjb, rm);
1145 invrm = gmx_invsqrt(norm2(rm));
1146 denom = invrm*invrm;
1149 cfx = c*invrm*fv[XX];
1150 cfy = c*invrm*fv[YY];
1151 cfz = c*invrm*fv[ZZ];
1162 fj[XX] = ( -rm[XX]*rt[XX]) * cfx + ( rab[ZZ]-rm[YY]*rt[XX]) * cfy + (-rab[YY]-rm[ZZ]*rt[XX]) * cfz;
1163 fj[YY] = (-rab[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + ( rab[XX]-rm[ZZ]*rt[YY]) * cfz;
1164 fj[ZZ] = ( rab[YY]-rm[XX]*rt[ZZ]) * cfx + (-rab[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1175 fk[XX] = ( -rm[XX]*rt[XX]) * cfx + (-a*rjb[ZZ]-rm[YY]*rt[XX]) * cfy + ( a*rjb[YY]-rm[ZZ]*rt[XX]) * cfz;
1176 fk[YY] = ( a*rjb[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + (-a*rjb[XX]-rm[ZZ]*rt[YY]) * cfz;
1177 fk[ZZ] = (-a*rjb[YY]-rm[XX]*rt[ZZ]) * cfx + ( a*rjb[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1188 fl[XX] = ( -rm[XX]*rt[XX]) * cfx + ( b*rja[ZZ]-rm[YY]*rt[XX]) * cfy + (-b*rja[YY]-rm[ZZ]*rt[XX]) * cfz;
1189 fl[YY] = (-b*rja[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + ( b*rja[XX]-rm[ZZ]*rt[YY]) * cfz;
1190 fl[ZZ] = ( b*rja[YY]-rm[XX]*rt[ZZ]) * cfx + (-b*rja[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1193 f[ai][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1194 f[ai][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1195 f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1196 rvec_inc(f[aj], fj);
1197 rvec_inc(f[ak], fk);
1198 rvec_inc(f[al], fl);
1203 ivec_sub(SHIFT_IVEC(g, av), SHIFT_IVEC(g, ai), di);
1205 ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
1207 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, ai), di);
1209 ivec_sub(SHIFT_IVEC(g, al), SHIFT_IVEC(g, ai), di);
1214 svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
1221 if (fshift && (svi != CENTRAL || sij != CENTRAL || sik != CENTRAL || sil != CENTRAL))
1223 rvec_dec(fshift[svi], fv);
1224 fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1225 fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1226 fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1227 rvec_inc(fshift[sij], fj);
1228 rvec_inc(fshift[sik], fk);
1229 rvec_inc(fshift[sil], fl);
1237 pbc_rvec_sub(pbc, x[av], x[ai], xiv);
1239 for (i = 0; i < DIM; i++)
1241 for (j = 0; j < DIM; j++)
1243 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j] + xil[i]*fl[j];
1248 /* Total: 207 flops (Yuck!) */
1252 static int spread_vsiten(t_iatom ia[], t_iparams ip[],
1253 rvec x[], rvec f[], rvec fshift[],
1254 t_pbc *pbc, t_graph *g)
1262 n3 = 3*ip[ia[0]].vsiten.n;
1264 copy_rvec(x[av], xv);
1266 for (i = 0; i < n3; i += 3)
1271 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, av), di);
1276 siv = pbc_dx_aiuc(pbc, x[ai], xv, dx);
1282 a = ip[ia[i]].vsiten.a;
1283 svmul(a, f[av], fi);
1284 rvec_inc(f[ai], fi);
1285 if (fshift && siv != CENTRAL)
1287 rvec_inc(fshift[siv], fi);
1288 rvec_dec(fshift[CENTRAL], fi);
1297 static int vsite_count(const t_ilist *ilist, int ftype)
1299 if (ftype == F_VSITEN)
1301 return ilist[ftype].nr/3;
1305 return ilist[ftype].nr/(1 + interaction_function[ftype].nratoms);
1309 static void spread_vsite_f_thread(gmx_vsite_t *vsite,
1310 rvec x[], rvec f[], rvec *fshift,
1311 gmx_bool VirCorr, matrix dxdf,
1312 t_iparams ip[], t_ilist ilist[],
1313 t_graph *g, t_pbc *pbc_null)
1317 int i, inc, m, nra, nr, tp, ftype;
1327 bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
1329 /* this loop goes backwards to be able to build *
1330 * higher type vsites from lower types */
1333 for (ftype = F_NRE-1; (ftype >= 0); ftype--)
1335 if ((interaction_function[ftype].flags & IF_VSITE) &&
1336 ilist[ftype].nr > 0)
1338 nra = interaction_function[ftype].nratoms;
1340 nr = ilist[ftype].nr;
1341 ia = ilist[ftype].iatoms;
1345 pbc_null2 = pbc_null;
1347 else if (pbc_null != NULL)
1349 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
1352 for (i = 0; i < nr; )
1354 if (vsite_pbc != NULL)
1356 if (vsite_pbc[i/(1+nra)] > -2)
1358 pbc_null2 = pbc_null;
1368 /* Constants for constructing */
1369 a1 = ip[tp].vsite.a;
1370 /* Construct the vsite depending on type */
1374 spread_vsite2(ia, a1, x, f, fshift, pbc_null2, g);
1377 b1 = ip[tp].vsite.b;
1378 spread_vsite3(ia, a1, b1, x, f, fshift, pbc_null2, g);
1381 b1 = ip[tp].vsite.b;
1382 spread_vsite3FD(ia, a1, b1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
1385 b1 = ip[tp].vsite.b;
1386 spread_vsite3FAD(ia, a1, b1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
1389 b1 = ip[tp].vsite.b;
1390 c1 = ip[tp].vsite.c;
1391 spread_vsite3OUT(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
1394 b1 = ip[tp].vsite.b;
1395 c1 = ip[tp].vsite.c;
1396 spread_vsite4FD(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
1399 b1 = ip[tp].vsite.b;
1400 c1 = ip[tp].vsite.c;
1401 spread_vsite4FDN(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
1404 inc = spread_vsiten(ia, ip, x, f, fshift, pbc_null2, g);
1407 gmx_fatal(FARGS, "No such vsite type %d in %s, line %d",
1408 ftype, __FILE__, __LINE__);
1410 clear_rvec(f[ia[1]]);
1412 /* Increment loop variables */
1420 void spread_vsite_f(gmx_vsite_t *vsite,
1421 rvec x[], rvec f[], rvec *fshift,
1422 gmx_bool VirCorr, matrix vir,
1423 t_nrnb *nrnb, t_idef *idef,
1424 int ePBC, gmx_bool bMolPBC, t_graph *g, matrix box,
1427 t_pbc pbc, *pbc_null;
1430 /* We only need to do pbc when we have inter-cg vsites */
1431 if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite)
1433 /* This is wasting some CPU time as we now do this multiple times
1434 * per MD step. But how often do we have vsites with full pbc?
1436 pbc_null = set_pbc_dd(&pbc, ePBC, cr->dd, FALSE, box);
1443 if (DOMAINDECOMP(cr))
1445 dd_clear_f_vsites(cr->dd, f);
1448 if (vsite->nthreads == 1)
1450 spread_vsite_f_thread(vsite,
1452 VirCorr, vsite->tdata[0].dxdf,
1453 idef->iparams, idef->il,
1458 /* First spread the vsites that might depend on other vsites */
1459 spread_vsite_f_thread(vsite,
1461 VirCorr, vsite->tdata[vsite->nthreads].dxdf,
1463 vsite->tdata[vsite->nthreads].ilist,
1466 #pragma omp parallel num_threads(vsite->nthreads)
1471 thread = gmx_omp_get_thread_num();
1473 if (thread == 0 || fshift == NULL)
1481 fshift_t = vsite->tdata[thread].fshift;
1483 for (i = 0; i < SHIFTS; i++)
1485 clear_rvec(fshift_t[i]);
1489 spread_vsite_f_thread(vsite,
1491 VirCorr, vsite->tdata[thread].dxdf,
1493 vsite->tdata[thread].ilist,
1501 for (th = 1; th < vsite->nthreads; th++)
1503 for (i = 0; i < SHIFTS; i++)
1505 rvec_inc(fshift[i], vsite->tdata[th].fshift[i]);
1515 for (th = 0; th < (vsite->nthreads == 1 ? 1 : vsite->nthreads+1); th++)
1517 for (i = 0; i < DIM; i++)
1519 for (j = 0; j < DIM; j++)
1521 vir[i][j] += -0.5*vsite->tdata[th].dxdf[i][j];
1527 if (DOMAINDECOMP(cr))
1529 dd_move_f_vsites(cr->dd, f, fshift);
1532 inc_nrnb(nrnb, eNR_VSITE2, vsite_count(idef->il, F_VSITE2));
1533 inc_nrnb(nrnb, eNR_VSITE3, vsite_count(idef->il, F_VSITE3));
1534 inc_nrnb(nrnb, eNR_VSITE3FD, vsite_count(idef->il, F_VSITE3FD));
1535 inc_nrnb(nrnb, eNR_VSITE3FAD, vsite_count(idef->il, F_VSITE3FAD));
1536 inc_nrnb(nrnb, eNR_VSITE3OUT, vsite_count(idef->il, F_VSITE3OUT));
1537 inc_nrnb(nrnb, eNR_VSITE4FD, vsite_count(idef->il, F_VSITE4FD));
1538 inc_nrnb(nrnb, eNR_VSITE4FDN, vsite_count(idef->il, F_VSITE4FDN));
1539 inc_nrnb(nrnb, eNR_VSITEN, vsite_count(idef->il, F_VSITEN));
1542 static int *atom2cg(t_block *cgs)
1546 snew(a2cg, cgs->index[cgs->nr]);
1547 for (cg = 0; cg < cgs->nr; cg++)
1549 for (i = cgs->index[cg]; i < cgs->index[cg+1]; i++)
1558 static int count_intercg_vsite(gmx_mtop_t *mtop,
1559 gmx_bool *bHaveChargeGroups)
1561 int mb, mt, ftype, nral, i, cg, a;
1562 gmx_molblock_t *molb;
1563 gmx_moltype_t *molt;
1567 int n_intercg_vsite;
1569 *bHaveChargeGroups = FALSE;
1571 n_intercg_vsite = 0;
1572 for (mb = 0; mb < mtop->nmolblock; mb++)
1574 molb = &mtop->molblock[mb];
1575 molt = &mtop->moltype[molb->type];
1577 if (molt->cgs.nr < molt->atoms.nr)
1579 *bHaveChargeGroups = TRUE;
1582 a2cg = atom2cg(&molt->cgs);
1583 for (ftype = 0; ftype < F_NRE; ftype++)
1585 if (interaction_function[ftype].flags & IF_VSITE)
1588 il = &molt->ilist[ftype];
1590 for (i = 0; i < il->nr; i += 1+nral)
1593 for (a = 1; a < nral; a++)
1595 if (a2cg[ia[1+a]] != cg)
1597 n_intercg_vsite += molb->nmol;
1607 return n_intercg_vsite;
1610 static int **get_vsite_pbc(t_iparams *iparams, t_ilist *ilist,
1611 t_atom *atom, t_mdatoms *md,
1612 t_block *cgs, int *a2cg)
1614 int ftype, nral, i, j, vsi, vsite, cg_v, cg_c, a, nc3 = 0;
1617 int **vsite_pbc, *vsite_pbc_f;
1619 gmx_bool bViteOnlyCG_and_FirstAtom;
1621 /* Make an array that tells if the pbc of an atom is set */
1622 snew(pbc_set, cgs->index[cgs->nr]);
1623 /* PBC is set for all non vsites */
1624 for (a = 0; a < cgs->index[cgs->nr]; a++)
1626 if ((atom && atom[a].ptype != eptVSite) ||
1627 (md && md->ptype[a] != eptVSite))
1633 snew(vsite_pbc, F_VSITEN-F_VSITE2+1);
1635 for (ftype = 0; ftype < F_NRE; ftype++)
1637 if (interaction_function[ftype].flags & IF_VSITE)
1643 snew(vsite_pbc[ftype-F_VSITE2], il->nr/(1+nral));
1644 vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
1652 /* A value of -2 signals that this vsite and its contructing
1653 * atoms are all within the same cg, so no pbc is required.
1655 vsite_pbc_f[vsi] = -2;
1656 /* Check if constructing atoms are outside the vsite's cg */
1658 if (ftype == F_VSITEN)
1660 nc3 = 3*iparams[ia[i]].vsiten.n;
1661 for (j = 0; j < nc3; j += 3)
1663 if (a2cg[ia[i+j+2]] != cg_v)
1665 vsite_pbc_f[vsi] = -1;
1671 for (a = 1; a < nral; a++)
1673 if (a2cg[ia[i+1+a]] != cg_v)
1675 vsite_pbc_f[vsi] = -1;
1679 if (vsite_pbc_f[vsi] == -1)
1681 /* Check if this is the first processed atom of a vsite only cg */
1682 bViteOnlyCG_and_FirstAtom = TRUE;
1683 for (a = cgs->index[cg_v]; a < cgs->index[cg_v+1]; a++)
1685 /* Non-vsites already have pbc set, so simply check for pbc_set */
1688 bViteOnlyCG_and_FirstAtom = FALSE;
1692 if (bViteOnlyCG_and_FirstAtom)
1694 /* First processed atom of a vsite only charge group.
1695 * The pbc of the input coordinates to construct_vsites
1696 * should be preserved.
1698 vsite_pbc_f[vsi] = vsite;
1700 else if (cg_v != a2cg[ia[1+i+1]])
1702 /* This vsite has a different charge group index
1703 * than it's first constructing atom
1704 * and the charge group has more than one atom,
1705 * search for the first normal particle
1706 * or vsite that already had its pbc defined.
1707 * If nothing is found, use full pbc for this vsite.
1709 for (a = cgs->index[cg_v]; a < cgs->index[cg_v+1]; a++)
1711 if (a != vsite && pbc_set[a])
1713 vsite_pbc_f[vsi] = a;
1716 fprintf(debug, "vsite %d match pbc with atom %d\n",
1724 fprintf(debug, "vsite atom %d cg %d - %d pbc atom %d\n",
1725 vsite+1, cgs->index[cg_v]+1, cgs->index[cg_v+1],
1726 vsite_pbc_f[vsi]+1);
1730 if (ftype == F_VSITEN)
1732 /* The other entries in vsite_pbc_f are not used for center vsites */
1740 /* This vsite now has its pbc defined */
1752 gmx_vsite_t *init_vsite(gmx_mtop_t *mtop, t_commrec *cr,
1753 gmx_bool bSerial_NoPBC)
1759 gmx_moltype_t *molt;
1762 /* check if there are vsites */
1764 for (i = 0; i < F_NRE; i++)
1766 if (interaction_function[i].flags & IF_VSITE)
1768 nvsite += gmx_mtop_ftype_count(mtop, i);
1779 vsite->n_intercg_vsite = count_intercg_vsite(mtop,
1780 &vsite->bHaveChargeGroups);
1782 /* If we don't have charge groups, the vsite follows its own pbc */
1783 if (!bSerial_NoPBC &&
1784 vsite->bHaveChargeGroups &&
1785 vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr))
1787 vsite->nvsite_pbc_molt = mtop->nmoltype;
1788 snew(vsite->vsite_pbc_molt, vsite->nvsite_pbc_molt);
1789 for (mt = 0; mt < mtop->nmoltype; mt++)
1791 molt = &mtop->moltype[mt];
1792 /* Make an atom to charge group index */
1793 a2cg = atom2cg(&molt->cgs);
1794 vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
1796 molt->atoms.atom, NULL,
1801 snew(vsite->vsite_pbc_loc_nalloc, F_VSITEN-F_VSITE2+1);
1802 snew(vsite->vsite_pbc_loc, F_VSITEN-F_VSITE2+1);
1807 vsite->nthreads = 1;
1811 vsite->nthreads = gmx_omp_nthreads_get(emntVSITE);
1815 /* We need one extra thread data structure for the overlap vsites */
1816 snew(vsite->tdata, vsite->nthreads+1);
1819 vsite->th_ind = NULL;
1820 vsite->th_ind_nalloc = 0;
1825 static void prepare_vsite_thread(const t_ilist *ilist,
1826 gmx_vsite_thread_t *vsite_th)
1830 for (ftype = 0; ftype < F_NRE; ftype++)
1832 if (interaction_function[ftype].flags & IF_VSITE)
1834 if (ilist[ftype].nr > vsite_th->ilist[ftype].nalloc)
1836 vsite_th->ilist[ftype].nalloc = over_alloc_large(ilist[ftype].nr);
1837 srenew(vsite_th->ilist[ftype].iatoms, vsite_th->ilist[ftype].nalloc);
1840 vsite_th->ilist[ftype].nr = 0;
1845 void split_vsites_over_threads(const t_ilist *ilist,
1846 const t_iparams *ip,
1847 const t_mdatoms *mdatoms,
1848 gmx_bool bLimitRange,
1852 int vsite_atom_range, natperthread;
1857 int nral1, inc, i, j;
1859 if (vsite->nthreads == 1)
1865 #pragma omp parallel for num_threads(vsite->nthreads) schedule(static)
1866 for (th = 0; th < vsite->nthreads; th++)
1868 prepare_vsite_thread(ilist, &vsite->tdata[th]);
1870 /* Master threads does the (potential) overlap vsites */
1871 prepare_vsite_thread(ilist, &vsite->tdata[vsite->nthreads]);
1873 /* The current way of distributing the vsites over threads in primitive.
1874 * We divide the atom range 0 - natoms_in_vsite uniformly over threads,
1875 * without taking into account how the vsites are distributed.
1876 * Without domain decomposition we bLimitRange=TRUE and we at least
1877 * tighten the upper bound of the range (useful for common systems
1878 * such as a vsite-protein in 3-site water).
1882 vsite_atom_range = -1;
1883 for (ftype = 0; ftype < F_NRE; ftype++)
1885 if (interaction_function[ftype].flags & IF_VSITE)
1887 if (ftype != F_VSITEN)
1889 nral1 = 1 + NRAL(ftype);
1890 iat = ilist[ftype].iatoms;
1891 for (i = 0; i < ilist[ftype].nr; i += nral1)
1893 for (j = i + 1; j < i + nral1; j++)
1895 vsite_atom_range = max(vsite_atom_range, iat[j]);
1903 iat = ilist[ftype].iatoms;
1906 while (i < ilist[ftype].nr)
1908 /* The 3 below is from 1+NRAL(ftype)=3 */
1909 vs_ind_end = i + ip[iat[i]].vsiten.n*3;
1911 vsite_atom_range = max(vsite_atom_range, iat[i+1]);
1912 while (i < vs_ind_end)
1914 vsite_atom_range = max(vsite_atom_range, iat[i+2]);
1925 vsite_atom_range = mdatoms->homenr;
1927 natperthread = (vsite_atom_range + vsite->nthreads - 1)/vsite->nthreads;
1931 fprintf(debug, "virtual site thread dist: natoms %d, range %d, natperthread %d\n", mdatoms->nr, vsite_atom_range, natperthread);
1934 /* To simplify the vsite assignment, we make an index which tells us
1935 * to which thread particles, both non-vsites and vsites, are assigned.
1937 if (mdatoms->nr > vsite->th_ind_nalloc)
1939 vsite->th_ind_nalloc = over_alloc_large(mdatoms->nr);
1940 srenew(vsite->th_ind, vsite->th_ind_nalloc);
1942 th_ind = vsite->th_ind;
1944 for (i = 0; i < mdatoms->nr; i++)
1946 if (mdatoms->ptype[i] == eptVSite)
1948 /* vsites are not assigned to a thread yet */
1953 /* assign non-vsite particles to thread th */
1956 if (i == (th + 1)*natperthread && th < vsite->nthreads)
1962 for (ftype = 0; ftype < F_NRE; ftype++)
1964 if (interaction_function[ftype].flags & IF_VSITE)
1966 nral1 = 1 + NRAL(ftype);
1968 iat = ilist[ftype].iatoms;
1969 for (i = 0; i < ilist[ftype].nr; )
1971 th = iat[1+i]/natperthread;
1972 /* We would like to assign this vsite the thread th,
1973 * but it might depend on atoms outside the atom range of th
1974 * or on another vsite not assigned to thread th.
1976 if (ftype != F_VSITEN)
1978 for (j = i + 2; j < i + nral1; j++)
1980 if (th_ind[iat[j]] != th)
1982 /* Some constructing atoms are not assigned to
1983 * thread th, move this vsite to a separate batch.
1985 th = vsite->nthreads;
1991 /* The 3 below is from 1+NRAL(ftype)=3 */
1992 inc = ip[iat[i]].vsiten.n*3;
1993 for (j = i + 2; j < i + inc; j += 3)
1995 if (th_ind[iat[j]] != th)
1997 th = vsite->nthreads;
2001 /* Copy this vsite to the thread data struct of thread th */
2002 il_th = &vsite->tdata[th].ilist[ftype];
2003 for (j = i; j < i + inc; j++)
2005 il_th->iatoms[il_th->nr++] = iat[j];
2007 /* Update this vsite's thread index entry */
2008 th_ind[iat[1+i]] = th;
2017 for (ftype = 0; ftype < F_NRE; ftype++)
2019 if ((interaction_function[ftype].flags & IF_VSITE) &&
2020 ilist[ftype].nr > 0)
2022 fprintf(debug, "%-20s thread dist:",
2023 interaction_function[ftype].longname);
2024 for (th = 0; th < vsite->nthreads+1; th++)
2026 fprintf(debug, " %4d", vsite->tdata[th].ilist[ftype].nr);
2028 fprintf(debug, "\n");
2034 void set_vsite_top(gmx_vsite_t *vsite, gmx_localtop_t *top, t_mdatoms *md,
2039 if (vsite->n_intercg_vsite > 0)
2041 if (vsite->bHaveChargeGroups)
2043 /* Make an atom to charge group index */
2044 a2cg = atom2cg(&top->cgs);
2045 vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
2046 top->idef.il, NULL, md,
2053 if (vsite->nthreads > 1)
2055 if (vsite->bHaveChargeGroups)
2057 gmx_fatal(FARGS, "The combination of threading, virtual sites and charge groups is not implemented");
2060 split_vsites_over_threads(top->idef.il, top->idef.iparams,
2061 md, !DOMAINDECOMP(cr), vsite);