2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
55 #include "mtop_util.h"
56 #include "gmx_omp_nthreads.h"
59 /* Routines to send/recieve coordinates and force
60 * of constructing atoms.
63 static void move_construct_x(t_comm_vsites *vsitecomm, rvec x[], t_commrec *cr)
69 sendbuf = vsitecomm->send_buf;
70 recvbuf = vsitecomm->recv_buf;
73 /* Prepare pulse left by copying to send buffer */
74 for (i = 0; i < vsitecomm->left_export_nconstruct; i++)
76 ia = vsitecomm->left_export_construct[i];
77 copy_rvec(x[ia], sendbuf[i]);
80 /* Pulse coordinates left */
81 gmx_tx_rx_real(cr, GMX_LEFT, (real *)sendbuf, 3*vsitecomm->left_export_nconstruct, GMX_RIGHT, (real *)recvbuf, 3*vsitecomm->right_import_nconstruct);
83 /* Copy from receive buffer to coordinate array */
84 for (i = 0; i < vsitecomm->right_import_nconstruct; i++)
86 ia = vsitecomm->right_import_construct[i];
87 copy_rvec(recvbuf[i], x[ia]);
90 /* Prepare pulse right by copying to send buffer */
91 for (i = 0; i < vsitecomm->right_export_nconstruct; i++)
93 ia = vsitecomm->right_export_construct[i];
94 copy_rvec(x[ia], sendbuf[i]);
97 /* Pulse coordinates right */
98 gmx_tx_rx_real(cr, GMX_RIGHT, (real *)sendbuf, 3*vsitecomm->right_export_nconstruct, GMX_LEFT, (real *)recvbuf, 3*vsitecomm->left_import_nconstruct);
100 /* Copy from receive buffer to coordinate array */
101 for (i = 0; i < vsitecomm->left_import_nconstruct; i++)
103 ia = vsitecomm->left_import_construct[i];
104 copy_rvec(recvbuf[i], x[ia]);
109 static void move_construct_f(t_comm_vsites *vsitecomm, rvec f[], t_commrec *cr)
115 sendbuf = vsitecomm->send_buf;
116 recvbuf = vsitecomm->recv_buf;
118 /* Prepare pulse right by copying to send buffer */
119 for (i = 0; i < vsitecomm->right_import_nconstruct; i++)
121 ia = vsitecomm->right_import_construct[i];
122 copy_rvec(f[ia], sendbuf[i]);
123 clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
126 /* Pulse forces right */
127 gmx_tx_rx_real(cr, GMX_RIGHT, (real *)sendbuf, 3*vsitecomm->right_import_nconstruct, GMX_LEFT, (real *)recvbuf, 3*vsitecomm->left_export_nconstruct);
129 /* Copy from receive buffer to coordinate array */
130 for (i = 0; i < vsitecomm->left_export_nconstruct; i++)
132 ia = vsitecomm->left_export_construct[i];
133 rvec_inc(f[ia], recvbuf[i]);
136 /* Prepare pulse left by copying to send buffer */
137 for (i = 0; i < vsitecomm->left_import_nconstruct; i++)
139 ia = vsitecomm->left_import_construct[i];
140 copy_rvec(f[ia], sendbuf[i]);
141 clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
144 /* Pulse coordinates left */
145 gmx_tx_rx_real(cr, GMX_LEFT, (real *)sendbuf, 3*vsitecomm->left_import_nconstruct, GMX_RIGHT, (real *)recvbuf, 3*vsitecomm->right_export_nconstruct);
147 /* Copy from receive buffer to coordinate array */
148 for (i = 0; i < vsitecomm->right_export_nconstruct; i++)
150 ia = vsitecomm->right_export_construct[i];
151 rvec_inc(f[ia], recvbuf[i]);
154 /* All forces are now on the home processors */
159 pd_clear_nonlocal_constructs(t_comm_vsites *vsitecomm, rvec f[])
163 for (i = 0; i < vsitecomm->left_import_nconstruct; i++)
165 ia = vsitecomm->left_import_construct[i];
168 for (i = 0; i < vsitecomm->right_import_nconstruct; i++)
170 ia = vsitecomm->right_import_construct[i];
177 static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
181 return pbc_dx_aiuc(pbc, xi, xj, dx);
185 rvec_sub(xi, xj, dx);
190 /* Vsite construction routines */
192 static void constr_vsite2(rvec xi, rvec xj, rvec x, real a, t_pbc *pbc)
202 pbc_dx_aiuc(pbc, xj, xi, dx);
203 x[XX] = xi[XX] + a*dx[XX];
204 x[YY] = xi[YY] + a*dx[YY];
205 x[ZZ] = xi[ZZ] + a*dx[ZZ];
209 x[XX] = b*xi[XX] + a*xj[XX];
210 x[YY] = b*xi[YY] + a*xj[YY];
211 x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
215 /* TOTAL: 10 flops */
218 static void constr_vsite3(rvec xi, rvec xj, rvec xk, rvec x, real a, real b,
229 pbc_dx_aiuc(pbc, xj, xi, dxj);
230 pbc_dx_aiuc(pbc, xk, xi, dxk);
231 x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
232 x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
233 x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
237 x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
238 x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
239 x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
243 /* TOTAL: 17 flops */
246 static void constr_vsite3FD(rvec xi, rvec xj, rvec xk, rvec x, real a, real b,
252 pbc_rvec_sub(pbc, xj, xi, xij);
253 pbc_rvec_sub(pbc, xk, xj, xjk);
256 /* temp goes from i to a point on the line jk */
257 temp[XX] = xij[XX] + a*xjk[XX];
258 temp[YY] = xij[YY] + a*xjk[YY];
259 temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
262 c = b*gmx_invsqrt(iprod(temp, temp));
265 x[XX] = xi[XX] + c*temp[XX];
266 x[YY] = xi[YY] + c*temp[YY];
267 x[ZZ] = xi[ZZ] + c*temp[ZZ];
270 /* TOTAL: 34 flops */
273 static void constr_vsite3FAD(rvec xi, rvec xj, rvec xk, rvec x, real a, real b, t_pbc *pbc)
276 real a1, b1, c1, invdij;
278 pbc_rvec_sub(pbc, xj, xi, xij);
279 pbc_rvec_sub(pbc, xk, xj, xjk);
282 invdij = gmx_invsqrt(iprod(xij, xij));
283 c1 = invdij * invdij * iprod(xij, xjk);
284 xp[XX] = xjk[XX] - c1*xij[XX];
285 xp[YY] = xjk[YY] - c1*xij[YY];
286 xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
288 b1 = b*gmx_invsqrt(iprod(xp, xp));
291 x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
292 x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
293 x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
296 /* TOTAL: 63 flops */
299 static void constr_vsite3OUT(rvec xi, rvec xj, rvec xk, rvec x,
300 real a, real b, real c, t_pbc *pbc)
304 pbc_rvec_sub(pbc, xj, xi, xij);
305 pbc_rvec_sub(pbc, xk, xi, xik);
306 cprod(xij, xik, temp);
309 x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
310 x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
311 x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
314 /* TOTAL: 33 flops */
317 static void constr_vsite4FD(rvec xi, rvec xj, rvec xk, rvec xl, rvec x,
318 real a, real b, real c, t_pbc *pbc)
320 rvec xij, xjk, xjl, temp;
323 pbc_rvec_sub(pbc, xj, xi, xij);
324 pbc_rvec_sub(pbc, xk, xj, xjk);
325 pbc_rvec_sub(pbc, xl, xj, xjl);
328 /* temp goes from i to a point on the plane jkl */
329 temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
330 temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
331 temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
334 d = c*gmx_invsqrt(iprod(temp, temp));
337 x[XX] = xi[XX] + d*temp[XX];
338 x[YY] = xi[YY] + d*temp[YY];
339 x[ZZ] = xi[ZZ] + d*temp[ZZ];
342 /* TOTAL: 43 flops */
346 static void constr_vsite4FDN(rvec xi, rvec xj, rvec xk, rvec xl, rvec x,
347 real a, real b, real c, t_pbc *pbc)
349 rvec xij, xik, xil, ra, rb, rja, rjb, rm;
352 pbc_rvec_sub(pbc, xj, xi, xij);
353 pbc_rvec_sub(pbc, xk, xi, xik);
354 pbc_rvec_sub(pbc, xl, xi, xil);
367 rvec_sub(ra, xij, rja);
368 rvec_sub(rb, xij, rjb);
374 d = c*gmx_invsqrt(norm2(rm));
377 x[XX] = xi[XX] + d*rm[XX];
378 x[YY] = xi[YY] + d*rm[YY];
379 x[ZZ] = xi[ZZ] + d*rm[ZZ];
382 /* TOTAL: 47 flops */
386 static int constr_vsiten(t_iatom *ia, t_iparams ip[],
394 n3 = 3*ip[ia[0]].vsiten.n;
397 copy_rvec(x[ai], x1);
399 for (i = 3; i < n3; i += 3)
402 a = ip[ia[i]].vsiten.a;
405 pbc_dx_aiuc(pbc, x[ai], x1, dx);
409 rvec_sub(x[ai], x1, dx);
411 dsum[XX] += a*dx[XX];
412 dsum[YY] += a*dx[YY];
413 dsum[ZZ] += a*dx[ZZ];
417 x[av][XX] = x1[XX] + dsum[XX];
418 x[av][YY] = x1[YY] + dsum[YY];
419 x[av][ZZ] = x1[ZZ] + dsum[ZZ];
425 void construct_vsites_thread(gmx_vsite_t *vsite,
426 rvec x[], t_nrnb *nrnb,
428 t_iparams ip[], t_ilist ilist[],
432 rvec xpbc, xv, vv, dx;
433 real a1, b1, c1, inv_dt;
434 int i, inc, ii, nra, nr, tp, ftype;
435 t_iatom avsite, ai, aj, ak, al, pbc_atom;
438 int *vsite_pbc, ishift;
439 rvec reftmp, vtmp, rtmp;
450 bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
454 for (ftype = 0; (ftype < F_NRE); ftype++)
456 if ((interaction_function[ftype].flags & IF_VSITE) &&
459 nra = interaction_function[ftype].nratoms;
461 nr = ilist[ftype].nr;
462 ia = ilist[ftype].iatoms;
466 pbc_null2 = pbc_null;
468 else if (pbc_null != NULL)
470 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
473 for (i = 0; i < nr; )
477 /* The vsite and constructing atoms */
481 /* Constants for constructing vsites */
483 /* Check what kind of pbc we need to use */
486 /* No charge groups, vsite follows its own pbc */
488 copy_rvec(x[avsite], xpbc);
490 else if (vsite_pbc != NULL)
492 pbc_atom = vsite_pbc[i/(1+nra)];
497 /* We need to copy the coordinates here,
498 * single for single atom cg's pbc_atom
499 * is the vsite itself.
501 copy_rvec(x[pbc_atom], xpbc);
503 pbc_null2 = pbc_null;
514 /* Copy the old position */
515 copy_rvec(x[avsite], xv);
517 /* Construct the vsite depending on type */
522 constr_vsite2(x[ai], x[aj], x[avsite], a1, pbc_null2);
528 constr_vsite3(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
534 constr_vsite3FD(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
540 constr_vsite3FAD(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
547 constr_vsite3OUT(x[ai], x[aj], x[ak], x[avsite], a1, b1, c1, pbc_null2);
555 constr_vsite4FD(x[ai], x[aj], x[ak], x[al], x[avsite], a1, b1, c1,
564 constr_vsite4FDN(x[ai], x[aj], x[ak], x[al], x[avsite], a1, b1, c1,
568 inc = constr_vsiten(ia, ip, x, pbc_null2);
571 gmx_fatal(FARGS, "No such vsite type %d in %s, line %d",
572 ftype, __FILE__, __LINE__);
577 /* Match the pbc of this vsite to the rest of its charge group */
578 ishift = pbc_dx_aiuc(pbc_null, x[avsite], xpbc, dx);
579 if (ishift != CENTRAL)
581 rvec_add(xpbc, dx, x[avsite]);
586 /* Calculate velocity of vsite... */
587 rvec_sub(x[avsite], xv, vv);
588 svmul(inv_dt, vv, v[avsite]);
591 /* Increment loop variables */
599 void construct_vsites(FILE *log, gmx_vsite_t *vsite,
600 rvec x[], t_nrnb *nrnb,
602 t_iparams ip[], t_ilist ilist[],
603 int ePBC, gmx_bool bMolPBC, t_graph *graph,
604 t_commrec *cr, matrix box)
606 t_pbc pbc, *pbc_null;
610 bDomDec = cr && DOMAINDECOMP(cr);
612 /* We only need to do pbc when we have inter-cg vsites */
613 if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite)
615 /* This is wasting some CPU time as we now do this multiple times
616 * per MD step. But how often do we have vsites with full pbc?
618 pbc_null = set_pbc_dd(&pbc, ePBC, cr != NULL ? cr->dd : NULL, FALSE, box);
629 dd_move_x_vsites(cr->dd, box, x);
631 else if (vsite->bPDvsitecomm)
633 /* I'm not sure whether the periodicity and shift are guaranteed
634 * to be consistent between different nodes when running e.g. polymers
635 * in parallel. In this special case we thus unshift/shift,
636 * but only when necessary. This is to make sure the coordinates
637 * we move don't end up a box away...
641 unshift_self(graph, box, x);
644 move_construct_x(vsite->vsitecomm, x, cr);
648 shift_self(graph, box, x);
653 if (vsite->nthreads == 1)
655 construct_vsites_thread(vsite,
662 #pragma omp parallel num_threads(vsite->nthreads)
664 construct_vsites_thread(vsite,
666 ip, vsite->tdata[gmx_omp_get_thread_num()].ilist,
669 /* Now we can construct the vsites that might depend on other vsites */
670 construct_vsites_thread(vsite,
672 ip, vsite->tdata[vsite->nthreads].ilist,
677 static void spread_vsite2(t_iatom ia[], real a,
678 rvec x[], rvec f[], rvec fshift[],
679 t_pbc *pbc, t_graph *g)
691 svmul(1-a, f[av], fi);
692 svmul( a, f[av], fj);
701 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, av), di);
703 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), di);
708 siv = pbc_dx_aiuc(pbc, x[ai], x[av], dx);
709 sij = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
717 if (fshift && (siv != CENTRAL || sij != CENTRAL))
719 rvec_inc(fshift[siv], f[av]);
720 rvec_dec(fshift[CENTRAL], fi);
721 rvec_dec(fshift[sij], fj);
724 /* TOTAL: 13 flops */
727 void construct_vsites_mtop(FILE *log, gmx_vsite_t *vsite,
728 gmx_mtop_t *mtop, rvec x[])
731 gmx_molblock_t *molb;
735 for (mb = 0; mb < mtop->nmolblock; mb++)
737 molb = &mtop->molblock[mb];
738 molt = &mtop->moltype[molb->type];
739 for (mol = 0; mol < molb->nmol; mol++)
741 construct_vsites(log, vsite, x+as, NULL, 0.0, NULL,
742 mtop->ffparams.iparams, molt->ilist,
743 epbcNONE, TRUE, NULL, NULL, NULL);
744 as += molt->atoms.nr;
749 static void spread_vsite3(t_iatom ia[], real a, real b,
750 rvec x[], rvec f[], rvec fshift[],
751 t_pbc *pbc, t_graph *g)
754 atom_id av, ai, aj, ak;
763 svmul(1-a-b, f[av], fi);
764 svmul( a, f[av], fj);
765 svmul( b, f[av], fk);
775 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ia[1]), di);
777 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), di);
779 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), di);
784 siv = pbc_dx_aiuc(pbc, x[ai], x[av], dx);
785 sij = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
786 sik = pbc_dx_aiuc(pbc, x[ai], x[ak], dx);
795 if (fshift && (siv != CENTRAL || sij != CENTRAL || sik != CENTRAL))
797 rvec_inc(fshift[siv], f[av]);
798 rvec_dec(fshift[CENTRAL], fi);
799 rvec_dec(fshift[sij], fj);
800 rvec_dec(fshift[sik], fk);
803 /* TOTAL: 20 flops */
806 static void spread_vsite3FD(t_iatom ia[], real a, real b,
807 rvec x[], rvec f[], rvec fshift[],
808 gmx_bool VirCorr, matrix dxdf,
809 t_pbc *pbc, t_graph *g)
811 real fx, fy, fz, c, invl, fproj, a1;
812 rvec xvi, xij, xjk, xix, fv, temp;
813 t_iatom av, ai, aj, ak;
814 int svi, sji, skj, d;
821 copy_rvec(f[av], fv);
823 sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
824 skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
827 /* xix goes from i to point x on the line jk */
828 xix[XX] = xij[XX]+a*xjk[XX];
829 xix[YY] = xij[YY]+a*xjk[YY];
830 xix[ZZ] = xij[ZZ]+a*xjk[ZZ];
833 invl = gmx_invsqrt(iprod(xix, xix));
837 fproj = iprod(xix, fv)*invl*invl; /* = (xix . f)/(xix . xix) */
839 temp[XX] = c*(fv[XX]-fproj*xix[XX]);
840 temp[YY] = c*(fv[YY]-fproj*xix[YY]);
841 temp[ZZ] = c*(fv[ZZ]-fproj*xix[ZZ]);
844 /* c is already calculated in constr_vsite3FD
845 storing c somewhere will save 26 flops! */
848 f[ai][XX] += fv[XX] - temp[XX];
849 f[ai][YY] += fv[YY] - temp[YY];
850 f[ai][ZZ] += fv[ZZ] - temp[ZZ];
851 f[aj][XX] += a1*temp[XX];
852 f[aj][YY] += a1*temp[YY];
853 f[aj][ZZ] += a1*temp[ZZ];
854 f[ak][XX] += a*temp[XX];
855 f[ak][YY] += a*temp[YY];
856 f[ak][ZZ] += a*temp[ZZ];
861 ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
863 ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
865 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
870 svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
877 if (fshift && (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL))
879 rvec_dec(fshift[svi], fv);
880 fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
881 fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
882 fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
883 fshift[ sji][XX] += temp[XX];
884 fshift[ sji][YY] += temp[YY];
885 fshift[ sji][ZZ] += temp[ZZ];
886 fshift[ skj][XX] += a*temp[XX];
887 fshift[ skj][YY] += a*temp[YY];
888 fshift[ skj][ZZ] += a*temp[ZZ];
893 /* When VirCorr=TRUE, the virial for the current forces is not
894 * calculated from the redistributed forces. This means that
895 * the effect of non-linear virtual site constructions on the virial
896 * needs to be added separately. This contribution can be calculated
897 * in many ways, but the simplest and cheapest way is to use
898 * the first constructing atom ai as a reference position in space:
899 * subtract (xv-xi)*fv and add (xj-xi)*fj + (xk-xi)*fk.
904 pbc_rvec_sub(pbc, x[av], x[ai], xiv);
906 for (i = 0; i < DIM; i++)
908 for (j = 0; j < DIM; j++)
910 /* As xix is a linear combination of j and k, use that here */
911 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
916 /* TOTAL: 61 flops */
919 static void spread_vsite3FAD(t_iatom ia[], real a, real b,
920 rvec x[], rvec f[], rvec fshift[],
921 gmx_bool VirCorr, matrix dxdf,
922 t_pbc *pbc, t_graph *g)
924 rvec xvi, xij, xjk, xperp, Fpij, Fppp, fv, f1, f2, f3;
925 real a1, b1, c1, c2, invdij, invdij2, invdp, fproj;
926 t_iatom av, ai, aj, ak;
927 int svi, sji, skj, d;
934 copy_rvec(f[ia[1]], fv);
936 sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
937 skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
940 invdij = gmx_invsqrt(iprod(xij, xij));
941 invdij2 = invdij * invdij;
942 c1 = iprod(xij, xjk) * invdij2;
943 xperp[XX] = xjk[XX] - c1*xij[XX];
944 xperp[YY] = xjk[YY] - c1*xij[YY];
945 xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
946 /* xperp in plane ijk, perp. to ij */
947 invdp = gmx_invsqrt(iprod(xperp, xperp));
952 /* a1, b1 and c1 are already calculated in constr_vsite3FAD
953 storing them somewhere will save 45 flops! */
955 fproj = iprod(xij, fv)*invdij2;
956 svmul(fproj, xij, Fpij); /* proj. f on xij */
957 svmul(iprod(xperp, fv)*invdp*invdp, xperp, Fppp); /* proj. f on xperp */
958 svmul(b1*fproj, xperp, f3);
961 rvec_sub(fv, Fpij, f1); /* f1 = f - Fpij */
962 rvec_sub(f1, Fppp, f2); /* f2 = f - Fpij - Fppp */
963 for (d = 0; (d < DIM); d++)
971 f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
972 f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
973 f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
974 f[aj][XX] += f1[XX] - c2*f2[XX] - f3[XX];
975 f[aj][YY] += f1[YY] - c2*f2[YY] - f3[YY];
976 f[aj][ZZ] += f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
984 ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
986 ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
988 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
993 svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
1000 if (fshift && (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL))
1002 rvec_dec(fshift[svi], fv);
1003 fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
1004 fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
1005 fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
1006 fshift[ sji][XX] += f1[XX] - c1 *f2[XX] - f3[XX];
1007 fshift[ sji][YY] += f1[YY] - c1 *f2[YY] - f3[YY];
1008 fshift[ sji][ZZ] += f1[ZZ] - c1 *f2[ZZ] - f3[ZZ];
1009 fshift[ skj][XX] += f2[XX];
1010 fshift[ skj][YY] += f2[YY];
1011 fshift[ skj][ZZ] += f2[ZZ];
1019 pbc_rvec_sub(pbc, x[av], x[ai], xiv);
1021 for (i = 0; i < DIM; i++)
1023 for (j = 0; j < DIM; j++)
1025 /* Note that xik=xij+xjk, so we have to add xij*f2 */
1028 + xij[i]*(f1[j] + (1 - c2)*f2[j] - f3[j])
1034 /* TOTAL: 113 flops */
1037 static void spread_vsite3OUT(t_iatom ia[], real a, real b, real c,
1038 rvec x[], rvec f[], rvec fshift[],
1039 gmx_bool VirCorr, matrix dxdf,
1040 t_pbc *pbc, t_graph *g)
1042 rvec xvi, xij, xik, fv, fj, fk;
1044 atom_id av, ai, aj, ak;
1053 sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
1054 ski = pbc_rvec_sub(pbc, x[ak], x[ai], xik);
1057 copy_rvec(f[av], fv);
1064 fj[XX] = a*fv[XX] - xik[ZZ]*cfy + xik[YY]*cfz;
1065 fj[YY] = xik[ZZ]*cfx + a*fv[YY] - xik[XX]*cfz;
1066 fj[ZZ] = -xik[YY]*cfx + xik[XX]*cfy + a*fv[ZZ];
1068 fk[XX] = b*fv[XX] + xij[ZZ]*cfy - xij[YY]*cfz;
1069 fk[YY] = -xij[ZZ]*cfx + b*fv[YY] + xij[XX]*cfz;
1070 fk[ZZ] = xij[YY]*cfx - xij[XX]*cfy + b*fv[ZZ];
1073 f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
1074 f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
1075 f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
1076 rvec_inc(f[aj], fj);
1077 rvec_inc(f[ak], fk);
1082 ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
1084 ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
1086 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, ai), di);
1091 svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
1098 if (fshift && (svi != CENTRAL || sji != CENTRAL || ski != CENTRAL))
1100 rvec_dec(fshift[svi], fv);
1101 fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
1102 fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
1103 fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
1104 rvec_inc(fshift[sji], fj);
1105 rvec_inc(fshift[ski], fk);
1113 pbc_rvec_sub(pbc, x[av], x[ai], xiv);
1115 for (i = 0; i < DIM; i++)
1117 for (j = 0; j < DIM; j++)
1119 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j];
1124 /* TOTAL: 54 flops */
1127 static void spread_vsite4FD(t_iatom ia[], real a, real b, real c,
1128 rvec x[], rvec f[], rvec fshift[],
1129 gmx_bool VirCorr, matrix dxdf,
1130 t_pbc *pbc, t_graph *g)
1132 real d, invl, fproj, a1;
1133 rvec xvi, xij, xjk, xjl, xix, fv, temp;
1134 atom_id av, ai, aj, ak, al;
1136 int svi, sji, skj, slj, m;
1144 sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
1145 skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
1146 slj = pbc_rvec_sub(pbc, x[al], x[aj], xjl);
1149 /* xix goes from i to point x on the plane jkl */
1150 for (m = 0; m < DIM; m++)
1152 xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
1156 invl = gmx_invsqrt(iprod(xix, xix));
1158 /* 4 + ?10? flops */
1160 copy_rvec(f[av], fv);
1162 fproj = iprod(xix, fv)*invl*invl; /* = (xix . f)/(xix . xix) */
1164 for (m = 0; m < DIM; m++)
1166 temp[m] = d*(fv[m] - fproj*xix[m]);
1170 /* c is already calculated in constr_vsite3FD
1171 storing c somewhere will save 35 flops! */
1174 for (m = 0; m < DIM; m++)
1176 f[ai][m] += fv[m] - temp[m];
1177 f[aj][m] += a1*temp[m];
1178 f[ak][m] += a*temp[m];
1179 f[al][m] += b*temp[m];
1185 ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
1187 ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
1189 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
1191 ivec_sub(SHIFT_IVEC(g, al), SHIFT_IVEC(g, aj), di);
1196 svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
1204 (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL || slj != CENTRAL))
1206 rvec_dec(fshift[svi], fv);
1207 for (m = 0; m < DIM; m++)
1209 fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
1210 fshift[ sji][m] += temp[m];
1211 fshift[ skj][m] += a*temp[m];
1212 fshift[ slj][m] += b*temp[m];
1221 pbc_rvec_sub(pbc, x[av], x[ai], xiv);
1223 for (i = 0; i < DIM; i++)
1225 for (j = 0; j < DIM; j++)
1227 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
1232 /* TOTAL: 77 flops */
1236 static void spread_vsite4FDN(t_iatom ia[], real a, real b, real c,
1237 rvec x[], rvec f[], rvec fshift[],
1238 gmx_bool VirCorr, matrix dxdf,
1239 t_pbc *pbc, t_graph *g)
1241 rvec xvi, xij, xik, xil, ra, rb, rja, rjb, rab, rm, rt;
1242 rvec fv, fj, fk, fl;
1246 int av, ai, aj, ak, al;
1247 int svi, sij, sik, sil;
1249 /* DEBUG: check atom indices */
1256 copy_rvec(f[av], fv);
1258 sij = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
1259 sik = pbc_rvec_sub(pbc, x[ak], x[ai], xik);
1260 sil = pbc_rvec_sub(pbc, x[al], x[ai], xil);
1273 rvec_sub(ra, xij, rja);
1274 rvec_sub(rb, xij, rjb);
1275 rvec_sub(rb, ra, rab);
1278 cprod(rja, rjb, rm);
1281 invrm = gmx_invsqrt(norm2(rm));
1282 denom = invrm*invrm;
1285 cfx = c*invrm*fv[XX];
1286 cfy = c*invrm*fv[YY];
1287 cfz = c*invrm*fv[ZZ];
1298 fj[XX] = ( -rm[XX]*rt[XX]) * cfx + ( rab[ZZ]-rm[YY]*rt[XX]) * cfy + (-rab[YY]-rm[ZZ]*rt[XX]) * cfz;
1299 fj[YY] = (-rab[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + ( rab[XX]-rm[ZZ]*rt[YY]) * cfz;
1300 fj[ZZ] = ( rab[YY]-rm[XX]*rt[ZZ]) * cfx + (-rab[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1311 fk[XX] = ( -rm[XX]*rt[XX]) * cfx + (-a*rjb[ZZ]-rm[YY]*rt[XX]) * cfy + ( a*rjb[YY]-rm[ZZ]*rt[XX]) * cfz;
1312 fk[YY] = ( a*rjb[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + (-a*rjb[XX]-rm[ZZ]*rt[YY]) * cfz;
1313 fk[ZZ] = (-a*rjb[YY]-rm[XX]*rt[ZZ]) * cfx + ( a*rjb[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1324 fl[XX] = ( -rm[XX]*rt[XX]) * cfx + ( b*rja[ZZ]-rm[YY]*rt[XX]) * cfy + (-b*rja[YY]-rm[ZZ]*rt[XX]) * cfz;
1325 fl[YY] = (-b*rja[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + ( b*rja[XX]-rm[ZZ]*rt[YY]) * cfz;
1326 fl[ZZ] = ( b*rja[YY]-rm[XX]*rt[ZZ]) * cfx + (-b*rja[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1329 f[ai][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1330 f[ai][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1331 f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1332 rvec_inc(f[aj], fj);
1333 rvec_inc(f[ak], fk);
1334 rvec_inc(f[al], fl);
1339 ivec_sub(SHIFT_IVEC(g, av), SHIFT_IVEC(g, ai), di);
1341 ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
1343 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, ai), di);
1345 ivec_sub(SHIFT_IVEC(g, al), SHIFT_IVEC(g, ai), di);
1350 svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
1357 if (fshift && (svi != CENTRAL || sij != CENTRAL || sik != CENTRAL || sil != CENTRAL))
1359 rvec_dec(fshift[svi], fv);
1360 fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1361 fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1362 fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1363 rvec_inc(fshift[sij], fj);
1364 rvec_inc(fshift[sik], fk);
1365 rvec_inc(fshift[sil], fl);
1373 pbc_rvec_sub(pbc, x[av], x[ai], xiv);
1375 for (i = 0; i < DIM; i++)
1377 for (j = 0; j < DIM; j++)
1379 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j] + xil[i]*fl[j];
1384 /* Total: 207 flops (Yuck!) */
1388 static int spread_vsiten(t_iatom ia[], t_iparams ip[],
1389 rvec x[], rvec f[], rvec fshift[],
1390 t_pbc *pbc, t_graph *g)
1398 n3 = 3*ip[ia[0]].vsiten.n;
1400 copy_rvec(x[av], xv);
1402 for (i = 0; i < n3; i += 3)
1407 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, av), di);
1412 siv = pbc_dx_aiuc(pbc, x[ai], xv, dx);
1418 a = ip[ia[i]].vsiten.a;
1419 svmul(a, f[av], fi);
1420 rvec_inc(f[ai], fi);
1421 if (fshift && siv != CENTRAL)
1423 rvec_inc(fshift[siv], fi);
1424 rvec_dec(fshift[CENTRAL], fi);
1433 static int vsite_count(const t_ilist *ilist, int ftype)
1435 if (ftype == F_VSITEN)
1437 return ilist[ftype].nr/3;
1441 return ilist[ftype].nr/(1 + interaction_function[ftype].nratoms);
1445 static void spread_vsite_f_thread(gmx_vsite_t *vsite,
1446 rvec x[], rvec f[], rvec *fshift,
1447 gmx_bool VirCorr, matrix dxdf,
1448 t_iparams ip[], t_ilist ilist[],
1449 t_graph *g, t_pbc *pbc_null)
1453 int i, inc, m, nra, nr, tp, ftype;
1463 bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
1465 /* this loop goes backwards to be able to build *
1466 * higher type vsites from lower types */
1469 for (ftype = F_NRE-1; (ftype >= 0); ftype--)
1471 if ((interaction_function[ftype].flags & IF_VSITE) &&
1472 ilist[ftype].nr > 0)
1474 nra = interaction_function[ftype].nratoms;
1476 nr = ilist[ftype].nr;
1477 ia = ilist[ftype].iatoms;
1481 pbc_null2 = pbc_null;
1483 else if (pbc_null != NULL)
1485 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
1488 for (i = 0; i < nr; )
1490 if (vsite_pbc != NULL)
1492 if (vsite_pbc[i/(1+nra)] > -2)
1494 pbc_null2 = pbc_null;
1504 /* Constants for constructing */
1505 a1 = ip[tp].vsite.a;
1506 /* Construct the vsite depending on type */
1510 spread_vsite2(ia, a1, x, f, fshift, pbc_null2, g);
1513 b1 = ip[tp].vsite.b;
1514 spread_vsite3(ia, a1, b1, x, f, fshift, pbc_null2, g);
1517 b1 = ip[tp].vsite.b;
1518 spread_vsite3FD(ia, a1, b1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
1521 b1 = ip[tp].vsite.b;
1522 spread_vsite3FAD(ia, a1, b1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
1525 b1 = ip[tp].vsite.b;
1526 c1 = ip[tp].vsite.c;
1527 spread_vsite3OUT(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
1530 b1 = ip[tp].vsite.b;
1531 c1 = ip[tp].vsite.c;
1532 spread_vsite4FD(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
1535 b1 = ip[tp].vsite.b;
1536 c1 = ip[tp].vsite.c;
1537 spread_vsite4FDN(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
1540 inc = spread_vsiten(ia, ip, x, f, fshift, pbc_null2, g);
1543 gmx_fatal(FARGS, "No such vsite type %d in %s, line %d",
1544 ftype, __FILE__, __LINE__);
1546 clear_rvec(f[ia[1]]);
1548 /* Increment loop variables */
1556 void spread_vsite_f(FILE *log, gmx_vsite_t *vsite,
1557 rvec x[], rvec f[], rvec *fshift,
1558 gmx_bool VirCorr, matrix vir,
1559 t_nrnb *nrnb, t_idef *idef,
1560 int ePBC, gmx_bool bMolPBC, t_graph *g, matrix box,
1563 t_pbc pbc, *pbc_null;
1566 /* We only need to do pbc when we have inter-cg vsites */
1567 if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite)
1569 /* This is wasting some CPU time as we now do this multiple times
1570 * per MD step. But how often do we have vsites with full pbc?
1572 pbc_null = set_pbc_dd(&pbc, ePBC, cr->dd, FALSE, box);
1579 if (DOMAINDECOMP(cr))
1581 dd_clear_f_vsites(cr->dd, f);
1583 else if (PARTDECOMP(cr) && vsite->vsitecomm != NULL)
1585 pd_clear_nonlocal_constructs(vsite->vsitecomm, f);
1588 if (vsite->nthreads == 1)
1590 spread_vsite_f_thread(vsite,
1592 VirCorr, vsite->tdata[0].dxdf,
1593 idef->iparams, idef->il,
1598 /* First spread the vsites that might depend on other vsites */
1599 spread_vsite_f_thread(vsite,
1601 VirCorr, vsite->tdata[vsite->nthreads].dxdf,
1603 vsite->tdata[vsite->nthreads].ilist,
1606 #pragma omp parallel num_threads(vsite->nthreads)
1611 thread = gmx_omp_get_thread_num();
1613 if (thread == 0 || fshift == NULL)
1621 fshift_t = vsite->tdata[thread].fshift;
1623 for (i = 0; i < SHIFTS; i++)
1625 clear_rvec(fshift_t[i]);
1629 spread_vsite_f_thread(vsite,
1631 VirCorr, vsite->tdata[thread].dxdf,
1633 vsite->tdata[thread].ilist,
1641 for (th = 1; th < vsite->nthreads; th++)
1643 for (i = 0; i < SHIFTS; i++)
1645 rvec_inc(fshift[i], vsite->tdata[th].fshift[i]);
1655 for (th = 0; th < (vsite->nthreads == 1 ? 1 : vsite->nthreads+1); th++)
1657 for (i = 0; i < DIM; i++)
1659 for (j = 0; j < DIM; j++)
1661 vir[i][j] += -0.5*vsite->tdata[th].dxdf[i][j];
1667 if (DOMAINDECOMP(cr))
1669 dd_move_f_vsites(cr->dd, f, fshift);
1671 else if (vsite->bPDvsitecomm)
1673 /* We only move forces here, and they are independent of shifts */
1674 move_construct_f(vsite->vsitecomm, f, cr);
1677 inc_nrnb(nrnb, eNR_VSITE2, vsite_count(idef->il, F_VSITE2));
1678 inc_nrnb(nrnb, eNR_VSITE3, vsite_count(idef->il, F_VSITE3));
1679 inc_nrnb(nrnb, eNR_VSITE3FD, vsite_count(idef->il, F_VSITE3FD));
1680 inc_nrnb(nrnb, eNR_VSITE3FAD, vsite_count(idef->il, F_VSITE3FAD));
1681 inc_nrnb(nrnb, eNR_VSITE3OUT, vsite_count(idef->il, F_VSITE3OUT));
1682 inc_nrnb(nrnb, eNR_VSITE4FD, vsite_count(idef->il, F_VSITE4FD));
1683 inc_nrnb(nrnb, eNR_VSITE4FDN, vsite_count(idef->il, F_VSITE4FDN));
1684 inc_nrnb(nrnb, eNR_VSITEN, vsite_count(idef->il, F_VSITEN));
1687 static int *atom2cg(t_block *cgs)
1691 snew(a2cg, cgs->index[cgs->nr]);
1692 for (cg = 0; cg < cgs->nr; cg++)
1694 for (i = cgs->index[cg]; i < cgs->index[cg+1]; i++)
1703 static int count_intercg_vsite(gmx_mtop_t *mtop,
1704 gmx_bool *bHaveChargeGroups)
1706 int mb, mt, ftype, nral, i, cg, a;
1707 gmx_molblock_t *molb;
1708 gmx_moltype_t *molt;
1712 int n_intercg_vsite;
1714 *bHaveChargeGroups = FALSE;
1716 n_intercg_vsite = 0;
1717 for (mb = 0; mb < mtop->nmolblock; mb++)
1719 molb = &mtop->molblock[mb];
1720 molt = &mtop->moltype[molb->type];
1722 if (molt->cgs.nr < molt->atoms.nr)
1724 *bHaveChargeGroups = TRUE;
1727 a2cg = atom2cg(&molt->cgs);
1728 for (ftype = 0; ftype < F_NRE; ftype++)
1730 if (interaction_function[ftype].flags & IF_VSITE)
1733 il = &molt->ilist[ftype];
1735 for (i = 0; i < il->nr; i += 1+nral)
1738 for (a = 1; a < nral; a++)
1740 if (a2cg[ia[1+a]] != cg)
1742 n_intercg_vsite += molb->nmol;
1752 return n_intercg_vsite;
1755 static int **get_vsite_pbc(t_iparams *iparams, t_ilist *ilist,
1756 t_atom *atom, t_mdatoms *md,
1757 t_block *cgs, int *a2cg)
1759 int ftype, nral, i, j, vsi, vsite, cg_v, cg_c, a, nc3 = 0;
1762 int **vsite_pbc, *vsite_pbc_f;
1764 gmx_bool bViteOnlyCG_and_FirstAtom;
1766 /* Make an array that tells if the pbc of an atom is set */
1767 snew(pbc_set, cgs->index[cgs->nr]);
1768 /* PBC is set for all non vsites */
1769 for (a = 0; a < cgs->index[cgs->nr]; a++)
1771 if ((atom && atom[a].ptype != eptVSite) ||
1772 (md && md->ptype[a] != eptVSite))
1778 snew(vsite_pbc, F_VSITEN-F_VSITE2+1);
1780 for (ftype = 0; ftype < F_NRE; ftype++)
1782 if (interaction_function[ftype].flags & IF_VSITE)
1788 snew(vsite_pbc[ftype-F_VSITE2], il->nr/(1+nral));
1789 vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
1797 /* A value of -2 signals that this vsite and its contructing
1798 * atoms are all within the same cg, so no pbc is required.
1800 vsite_pbc_f[vsi] = -2;
1801 /* Check if constructing atoms are outside the vsite's cg */
1803 if (ftype == F_VSITEN)
1805 nc3 = 3*iparams[ia[i]].vsiten.n;
1806 for (j = 0; j < nc3; j += 3)
1808 if (a2cg[ia[i+j+2]] != cg_v)
1810 vsite_pbc_f[vsi] = -1;
1816 for (a = 1; a < nral; a++)
1818 if (a2cg[ia[i+1+a]] != cg_v)
1820 vsite_pbc_f[vsi] = -1;
1824 if (vsite_pbc_f[vsi] == -1)
1826 /* Check if this is the first processed atom of a vsite only cg */
1827 bViteOnlyCG_and_FirstAtom = TRUE;
1828 for (a = cgs->index[cg_v]; a < cgs->index[cg_v+1]; a++)
1830 /* Non-vsites already have pbc set, so simply check for pbc_set */
1833 bViteOnlyCG_and_FirstAtom = FALSE;
1837 if (bViteOnlyCG_and_FirstAtom)
1839 /* First processed atom of a vsite only charge group.
1840 * The pbc of the input coordinates to construct_vsites
1841 * should be preserved.
1843 vsite_pbc_f[vsi] = vsite;
1845 else if (cg_v != a2cg[ia[1+i+1]])
1847 /* This vsite has a different charge group index
1848 * than it's first constructing atom
1849 * and the charge group has more than one atom,
1850 * search for the first normal particle
1851 * or vsite that already had its pbc defined.
1852 * If nothing is found, use full pbc for this vsite.
1854 for (a = cgs->index[cg_v]; a < cgs->index[cg_v+1]; a++)
1856 if (a != vsite && pbc_set[a])
1858 vsite_pbc_f[vsi] = a;
1861 fprintf(debug, "vsite %d match pbc with atom %d\n",
1869 fprintf(debug, "vsite atom %d cg %d - %d pbc atom %d\n",
1870 vsite+1, cgs->index[cg_v]+1, cgs->index[cg_v+1],
1871 vsite_pbc_f[vsi]+1);
1875 if (ftype == F_VSITEN)
1877 /* The other entries in vsite_pbc_f are not used for center vsites */
1885 /* This vsite now has its pbc defined */
1897 gmx_vsite_t *init_vsite(gmx_mtop_t *mtop, t_commrec *cr,
1898 gmx_bool bSerial_NoPBC)
1904 gmx_moltype_t *molt;
1907 /* check if there are vsites */
1909 for (i = 0; i < F_NRE; i++)
1911 if (interaction_function[i].flags & IF_VSITE)
1913 nvsite += gmx_mtop_ftype_count(mtop, i);
1924 vsite->n_intercg_vsite = count_intercg_vsite(mtop,
1925 &vsite->bHaveChargeGroups);
1927 /* If we don't have charge groups, the vsite follows its own pbc */
1928 if (!bSerial_NoPBC &&
1929 vsite->bHaveChargeGroups &&
1930 vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr))
1932 vsite->nvsite_pbc_molt = mtop->nmoltype;
1933 snew(vsite->vsite_pbc_molt, vsite->nvsite_pbc_molt);
1934 for (mt = 0; mt < mtop->nmoltype; mt++)
1936 molt = &mtop->moltype[mt];
1937 /* Make an atom to charge group index */
1938 a2cg = atom2cg(&molt->cgs);
1939 vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
1941 molt->atoms.atom, NULL,
1946 snew(vsite->vsite_pbc_loc_nalloc, F_VSITEN-F_VSITE2+1);
1947 snew(vsite->vsite_pbc_loc, F_VSITEN-F_VSITE2+1);
1952 vsite->nthreads = 1;
1956 vsite->nthreads = gmx_omp_nthreads_get(emntVSITE);
1960 /* We need one extra thread data structure for the overlap vsites */
1961 snew(vsite->tdata, vsite->nthreads+1);
1964 vsite->th_ind = NULL;
1965 vsite->th_ind_nalloc = 0;
1970 static void prepare_vsite_thread(const t_ilist *ilist,
1971 gmx_vsite_thread_t *vsite_th)
1975 for (ftype = 0; ftype < F_NRE; ftype++)
1977 if (interaction_function[ftype].flags & IF_VSITE)
1979 if (ilist[ftype].nr > vsite_th->ilist[ftype].nalloc)
1981 vsite_th->ilist[ftype].nalloc = over_alloc_large(ilist[ftype].nr);
1982 srenew(vsite_th->ilist[ftype].iatoms, vsite_th->ilist[ftype].nalloc);
1985 vsite_th->ilist[ftype].nr = 0;
1990 void split_vsites_over_threads(const t_ilist *ilist,
1991 const t_mdatoms *mdatoms,
1992 gmx_bool bLimitRange,
1996 int vsite_atom_range, natperthread;
2001 int nral1, inc, i, j;
2003 if (vsite->nthreads == 1)
2009 #pragma omp parallel for num_threads(vsite->nthreads) schedule(static)
2010 for (th = 0; th < vsite->nthreads; th++)
2012 prepare_vsite_thread(ilist, &vsite->tdata[th]);
2014 /* Master threads does the (potential) overlap vsites */
2015 prepare_vsite_thread(ilist, &vsite->tdata[vsite->nthreads]);
2017 /* The current way of distributing the vsites over threads in primitive.
2018 * We divide the atom range 0 - natoms_in_vsite uniformly over threads,
2019 * without taking into account how the vsites are distributed.
2020 * Without domain decomposition we bLimitRange=TRUE and we at least
2021 * tighten the upper bound of the range (useful for common systems
2022 * such as a vsite-protein in 3-site water).
2026 vsite_atom_range = -1;
2027 for (ftype = 0; ftype < F_NRE; ftype++)
2029 if ((interaction_function[ftype].flags & IF_VSITE) &&
2032 nral1 = 1 + NRAL(ftype);
2033 iat = ilist[ftype].iatoms;
2034 for (i = 0; i < ilist[ftype].nr; i += nral1)
2036 for (j = i+1; j < i+nral1; j++)
2038 vsite_atom_range = max(vsite_atom_range, iat[j]);
2047 vsite_atom_range = mdatoms->homenr;
2049 natperthread = (vsite_atom_range + vsite->nthreads - 1)/vsite->nthreads;
2053 fprintf(debug, "virtual site thread dist: natoms %d, range %d, natperthread %d\n", mdatoms->nr, vsite_atom_range, natperthread);
2056 /* To simplify the vsite assignment, we make an index which tells us
2057 * to which thread particles, both non-vsites and vsites, are assigned.
2059 if (mdatoms->nr > vsite->th_ind_nalloc)
2061 vsite->th_ind_nalloc = over_alloc_large(mdatoms->nr);
2062 srenew(vsite->th_ind, vsite->th_ind_nalloc);
2064 th_ind = vsite->th_ind;
2066 for (i = 0; i < mdatoms->nr; i++)
2068 if (mdatoms->ptype[i] == eptVSite)
2070 /* vsites are not assigned to a thread yet */
2075 /* assign non-vsite particles to thread th */
2078 if (i == (th + 1)*natperthread && th < vsite->nthreads)
2084 for (ftype = 0; ftype < F_NRE; ftype++)
2086 if ((interaction_function[ftype].flags & IF_VSITE) &&
2089 nral1 = 1 + NRAL(ftype);
2091 iat = ilist[ftype].iatoms;
2092 for (i = 0; i < ilist[ftype].nr; )
2094 th = iat[1+i]/natperthread;
2095 /* We would like to assign this vsite the thread th,
2096 * but it might depend on atoms outside the atom range of th
2097 * or on another vsite not assigned to thread th.
2099 if (ftype != F_VSITEN)
2101 for (j = i+2; j < i+nral1; j++)
2103 if (th_ind[iat[j]] != th)
2105 /* Some constructing atoms are not assigned to
2106 * thread th, move this vsite to a separate batch.
2108 th = vsite->nthreads;
2115 for (j = i+2; j < i+inc; j += 3)
2117 if (th_ind[iat[j]] != th)
2119 th = vsite->nthreads;
2123 /* Copy this vsite to the thread data struct of thread th */
2124 il_th = &vsite->tdata[th].ilist[ftype];
2125 for (j = i; j < i+inc; j++)
2127 il_th->iatoms[il_th->nr++] = iat[j];
2129 /* Update this vsite's thread index entry */
2130 th_ind[iat[1+i]] = th;
2139 for (ftype = 0; ftype < F_NRE; ftype++)
2141 if ((interaction_function[ftype].flags & IF_VSITE) &&
2142 ilist[ftype].nr > 0)
2144 fprintf(debug, "%-20s thread dist:",
2145 interaction_function[ftype].longname);
2146 for (th = 0; th < vsite->nthreads+1; th++)
2148 fprintf(debug, " %4d", vsite->tdata[th].ilist[ftype].nr);
2150 fprintf(debug, "\n");
2156 void set_vsite_top(gmx_vsite_t *vsite, gmx_localtop_t *top, t_mdatoms *md,
2161 if (vsite->n_intercg_vsite > 0)
2163 if (vsite->bHaveChargeGroups)
2165 /* Make an atom to charge group index */
2166 a2cg = atom2cg(&top->cgs);
2167 vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
2168 top->idef.il, NULL, md,
2175 snew(vsite->vsitecomm, 1);
2176 vsite->bPDvsitecomm =
2177 setup_parallel_vsites(&(top->idef), cr, vsite->vsitecomm);
2181 if (vsite->nthreads > 1)
2183 if (vsite->bHaveChargeGroups || PARTDECOMP(cr))
2185 gmx_incons("Can not use threads virtual sites combined with charge groups or particle decomposition");
2188 split_vsites_over_threads(top->idef.il, md, !DOMAINDECOMP(cr), vsite);