2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
55 #include "mtop_util.h"
56 #include "gmx_omp_nthreads.h"
59 /* Routines to send/recieve coordinates and force
60 * of constructing atoms.
63 static void move_construct_x(t_comm_vsites *vsitecomm, rvec x[], t_commrec *cr)
69 sendbuf = vsitecomm->send_buf;
70 recvbuf = vsitecomm->recv_buf;
73 /* Prepare pulse left by copying to send buffer */
74 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
76 ia = vsitecomm->left_export_construct[i];
77 copy_rvec(x[ia],sendbuf[i]);
80 /* Pulse coordinates left */
81 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_export_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_import_nconstruct);
83 /* Copy from receive buffer to coordinate array */
84 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
86 ia = vsitecomm->right_import_construct[i];
87 copy_rvec(recvbuf[i],x[ia]);
90 /* Prepare pulse right by copying to send buffer */
91 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
93 ia = vsitecomm->right_export_construct[i];
94 copy_rvec(x[ia],sendbuf[i]);
97 /* Pulse coordinates right */
98 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_export_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_import_nconstruct);
100 /* Copy from receive buffer to coordinate array */
101 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
103 ia = vsitecomm->left_import_construct[i];
104 copy_rvec(recvbuf[i],x[ia]);
109 static void move_construct_f(t_comm_vsites *vsitecomm, rvec f[], t_commrec *cr)
115 sendbuf = vsitecomm->send_buf;
116 recvbuf = vsitecomm->recv_buf;
118 /* Prepare pulse right by copying to send buffer */
119 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
121 ia = vsitecomm->right_import_construct[i];
122 copy_rvec(f[ia],sendbuf[i]);
123 clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
126 /* Pulse forces right */
127 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_import_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_export_nconstruct);
129 /* Copy from receive buffer to coordinate array */
130 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
132 ia = vsitecomm->left_export_construct[i];
133 rvec_inc(f[ia],recvbuf[i]);
136 /* Prepare pulse left by copying to send buffer */
137 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
139 ia = vsitecomm->left_import_construct[i];
140 copy_rvec(f[ia],sendbuf[i]);
141 clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
144 /* Pulse coordinates left */
145 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_import_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_export_nconstruct);
147 /* Copy from receive buffer to coordinate array */
148 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
150 ia = vsitecomm->right_export_construct[i];
151 rvec_inc(f[ia],recvbuf[i]);
154 /* All forces are now on the home processors */
159 pd_clear_nonlocal_constructs(t_comm_vsites *vsitecomm, rvec f[])
163 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
165 ia = vsitecomm->left_import_construct[i];
168 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
170 ia = vsitecomm->right_import_construct[i];
177 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
180 return pbc_dx_aiuc(pbc,xi,xj,dx);
188 /* Vsite construction routines */
190 static void constr_vsite2(rvec xi,rvec xj,rvec x,real a,t_pbc *pbc)
199 pbc_dx_aiuc(pbc,xj,xi,dx);
200 x[XX] = xi[XX] + a*dx[XX];
201 x[YY] = xi[YY] + a*dx[YY];
202 x[ZZ] = xi[ZZ] + a*dx[ZZ];
204 x[XX] = b*xi[XX] + a*xj[XX];
205 x[YY] = b*xi[YY] + a*xj[YY];
206 x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
210 /* TOTAL: 10 flops */
213 static void constr_vsite3(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
223 pbc_dx_aiuc(pbc,xj,xi,dxj);
224 pbc_dx_aiuc(pbc,xk,xi,dxk);
225 x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
226 x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
227 x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
229 x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
230 x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
231 x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
235 /* TOTAL: 17 flops */
238 static void constr_vsite3FD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
244 pbc_rvec_sub(pbc,xj,xi,xij);
245 pbc_rvec_sub(pbc,xk,xj,xjk);
248 /* temp goes from i to a point on the line jk */
249 temp[XX] = xij[XX] + a*xjk[XX];
250 temp[YY] = xij[YY] + a*xjk[YY];
251 temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
254 c=b*gmx_invsqrt(iprod(temp,temp));
257 x[XX] = xi[XX] + c*temp[XX];
258 x[YY] = xi[YY] + c*temp[YY];
259 x[ZZ] = xi[ZZ] + c*temp[ZZ];
262 /* TOTAL: 34 flops */
265 static void constr_vsite3FAD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b, t_pbc *pbc)
268 real a1,b1,c1,invdij;
270 pbc_rvec_sub(pbc,xj,xi,xij);
271 pbc_rvec_sub(pbc,xk,xj,xjk);
274 invdij = gmx_invsqrt(iprod(xij,xij));
275 c1 = invdij * invdij * iprod(xij,xjk);
276 xp[XX] = xjk[XX] - c1*xij[XX];
277 xp[YY] = xjk[YY] - c1*xij[YY];
278 xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
280 b1 = b*gmx_invsqrt(iprod(xp,xp));
283 x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
284 x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
285 x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
288 /* TOTAL: 63 flops */
291 static void constr_vsite3OUT(rvec xi,rvec xj,rvec xk,rvec x,
292 real a,real b,real c,t_pbc *pbc)
296 pbc_rvec_sub(pbc,xj,xi,xij);
297 pbc_rvec_sub(pbc,xk,xi,xik);
301 x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
302 x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
303 x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
306 /* TOTAL: 33 flops */
309 static void constr_vsite4FD(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
310 real a,real b,real c,t_pbc *pbc)
312 rvec xij,xjk,xjl,temp;
315 pbc_rvec_sub(pbc,xj,xi,xij);
316 pbc_rvec_sub(pbc,xk,xj,xjk);
317 pbc_rvec_sub(pbc,xl,xj,xjl);
320 /* temp goes from i to a point on the plane jkl */
321 temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
322 temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
323 temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
326 d=c*gmx_invsqrt(iprod(temp,temp));
329 x[XX] = xi[XX] + d*temp[XX];
330 x[YY] = xi[YY] + d*temp[YY];
331 x[ZZ] = xi[ZZ] + d*temp[ZZ];
334 /* TOTAL: 43 flops */
338 static void constr_vsite4FDN(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
339 real a,real b,real c,t_pbc *pbc)
341 rvec xij,xik,xil,ra,rb,rja,rjb,rm;
344 pbc_rvec_sub(pbc,xj,xi,xij);
345 pbc_rvec_sub(pbc,xk,xi,xik);
346 pbc_rvec_sub(pbc,xl,xi,xil);
359 rvec_sub(ra,xij,rja);
360 rvec_sub(rb,xij,rjb);
366 d=c*gmx_invsqrt(norm2(rm));
369 x[XX] = xi[XX] + d*rm[XX];
370 x[YY] = xi[YY] + d*rm[YY];
371 x[ZZ] = xi[ZZ] + d*rm[ZZ];
374 /* TOTAL: 47 flops */
378 static int constr_vsiten(t_iatom *ia, t_iparams ip[],
386 n3 = 3*ip[ia[0]].vsiten.n;
391 for(i=3; i<n3; i+=3) {
393 a = ip[ia[i]].vsiten.a;
395 pbc_dx_aiuc(pbc,x[ai],x1,dx);
397 rvec_sub(x[ai],x1,dx);
399 dsum[XX] += a*dx[XX];
400 dsum[YY] += a*dx[YY];
401 dsum[ZZ] += a*dx[ZZ];
405 x[av][XX] = x1[XX] + dsum[XX];
406 x[av][YY] = x1[YY] + dsum[YY];
407 x[av][ZZ] = x1[ZZ] + dsum[ZZ];
413 void construct_vsites_thread(gmx_vsite_t *vsite,
414 rvec x[],t_nrnb *nrnb,
416 t_iparams ip[],t_ilist ilist[],
421 real a1,b1,c1,inv_dt;
422 int i,inc,ii,nra,nr,tp,ftype;
423 t_iatom avsite,ai,aj,ak,al,pbc_atom;
426 int *vsite_pbc,ishift;
427 rvec reftmp,vtmp,rtmp;
438 bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
442 for(ftype=0; (ftype<F_NRE); ftype++)
444 if ((interaction_function[ftype].flags & IF_VSITE) &&
447 nra = interaction_function[ftype].nratoms;
449 nr = ilist[ftype].nr;
450 ia = ilist[ftype].iatoms;
454 pbc_null2 = pbc_null;
456 else if (pbc_null != NULL)
458 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
465 /* The vsite and constructing atoms */
470 /* Constants for constructing vsites */
472 /* Check what kind of pbc we need to use */
475 /* No charge groups, vsite follows its own pbc */
477 copy_rvec(x[avsite],xpbc);
479 else if (vsite_pbc != NULL)
481 pbc_atom = vsite_pbc[i/(1+nra)];
486 /* We need to copy the coordinates here,
487 * single for single atom cg's pbc_atom
488 * is the vsite itself.
490 copy_rvec(x[pbc_atom],xpbc);
492 pbc_null2 = pbc_null;
503 /* Copy the old position */
504 copy_rvec(x[avsite],xv);
506 /* Construct the vsite depending on type */
510 constr_vsite2(x[ai],x[aj],x[avsite],a1,pbc_null2);
515 constr_vsite3(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
520 constr_vsite3FD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
525 constr_vsite3FAD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
531 constr_vsite3OUT(x[ai],x[aj],x[ak],x[avsite],a1,b1,c1,pbc_null2);
538 constr_vsite4FD(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
546 constr_vsite4FDN(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
550 inc = constr_vsiten(ia,ip,x,pbc_null2);
553 gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
554 ftype,__FILE__,__LINE__);
559 /* Match the pbc of this vsite to the rest of its charge group */
560 ishift = pbc_dx_aiuc(pbc_null,x[avsite],xpbc,dx);
561 if (ishift != CENTRAL)
563 rvec_add(xpbc,dx,x[avsite]);
568 /* Calculate velocity of vsite... */
569 rvec_sub(x[avsite],xv,vv);
570 svmul(inv_dt,vv,v[avsite]);
573 /* Increment loop variables */
581 void construct_vsites(FILE *log,gmx_vsite_t *vsite,
582 rvec x[],t_nrnb *nrnb,
584 t_iparams ip[],t_ilist ilist[],
585 int ePBC,gmx_bool bMolPBC,t_graph *graph,
586 t_commrec *cr,matrix box)
592 bDomDec = cr && DOMAINDECOMP(cr);
594 /* We only need to do pbc when we have inter-cg vsites */
595 if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite)
597 /* This is wasting some CPU time as we now do this multiple times
598 * per MD step. But how often do we have vsites with full pbc?
600 pbc_null = set_pbc_dd(&pbc,ePBC,cr!=NULL ? cr->dd : NULL,FALSE,box);
611 dd_move_x_vsites(cr->dd,box,x);
613 else if (vsite->bPDvsitecomm)
615 /* I'm not sure whether the periodicity and shift are guaranteed
616 * to be consistent between different nodes when running e.g. polymers
617 * in parallel. In this special case we thus unshift/shift,
618 * but only when necessary. This is to make sure the coordinates
619 * we move don't end up a box away...
623 unshift_self(graph,box,x);
626 move_construct_x(vsite->vsitecomm,x,cr);
630 shift_self(graph,box,x);
635 if (vsite->nthreads == 1)
637 construct_vsites_thread(vsite,
644 #pragma omp parallel num_threads(vsite->nthreads)
646 construct_vsites_thread(vsite,
648 ip,vsite->tdata[gmx_omp_get_thread_num()].ilist,
651 /* Now we can construct the vsites that might depend on other vsites */
652 construct_vsites_thread(vsite,
654 ip,vsite->tdata[vsite->nthreads].ilist,
659 static void spread_vsite2(t_iatom ia[],real a,
660 rvec x[],rvec f[],rvec fshift[],
661 t_pbc *pbc,t_graph *g)
682 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
684 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
687 siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
688 sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
694 if (fshift && (siv != CENTRAL || sij != CENTRAL)) {
695 rvec_inc(fshift[siv],f[av]);
696 rvec_dec(fshift[CENTRAL],fi);
697 rvec_dec(fshift[sij],fj);
700 /* TOTAL: 13 flops */
703 void construct_vsites_mtop(FILE *log,gmx_vsite_t *vsite,
704 gmx_mtop_t *mtop,rvec x[])
707 gmx_molblock_t *molb;
711 for(mb=0; mb<mtop->nmolblock; mb++) {
712 molb = &mtop->molblock[mb];
713 molt = &mtop->moltype[molb->type];
714 for(mol=0; mol<molb->nmol; mol++) {
715 construct_vsites(log,vsite,x+as,NULL,0.0,NULL,
716 mtop->ffparams.iparams,molt->ilist,
717 epbcNONE,TRUE,NULL,NULL,NULL);
718 as += molt->atoms.nr;
723 static void spread_vsite3(t_iatom ia[],real a,real b,
724 rvec x[],rvec f[],rvec fshift[],
725 t_pbc *pbc,t_graph *g)
737 svmul(1-a-b,f[av],fi);
748 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ia[1]),di);
750 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
752 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),di);
755 siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
756 sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
757 sik = pbc_dx_aiuc(pbc,x[ai],x[ak],dx);
764 if (fshift && (siv!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL)) {
765 rvec_inc(fshift[siv],f[av]);
766 rvec_dec(fshift[CENTRAL],fi);
767 rvec_dec(fshift[sij],fj);
768 rvec_dec(fshift[sik],fk);
771 /* TOTAL: 20 flops */
774 static void spread_vsite3FD(t_iatom ia[],real a,real b,
775 rvec x[],rvec f[],rvec fshift[],
776 gmx_bool VirCorr,matrix dxdf,
777 t_pbc *pbc,t_graph *g)
779 real fx,fy,fz,c,invl,fproj,a1;
780 rvec xvi,xij,xjk,xix,fv,temp;
791 sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
792 skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
795 /* xix goes from i to point x on the line jk */
796 xix[XX]=xij[XX]+a*xjk[XX];
797 xix[YY]=xij[YY]+a*xjk[YY];
798 xix[ZZ]=xij[ZZ]+a*xjk[ZZ];
801 invl=gmx_invsqrt(iprod(xix,xix));
805 fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
807 temp[XX]=c*(fv[XX]-fproj*xix[XX]);
808 temp[YY]=c*(fv[YY]-fproj*xix[YY]);
809 temp[ZZ]=c*(fv[ZZ]-fproj*xix[ZZ]);
812 /* c is already calculated in constr_vsite3FD
813 storing c somewhere will save 26 flops! */
816 f[ai][XX] += fv[XX] - temp[XX];
817 f[ai][YY] += fv[YY] - temp[YY];
818 f[ai][ZZ] += fv[ZZ] - temp[ZZ];
819 f[aj][XX] += a1*temp[XX];
820 f[aj][YY] += a1*temp[YY];
821 f[aj][ZZ] += a1*temp[ZZ];
822 f[ak][XX] += a*temp[XX];
823 f[ak][YY] += a*temp[YY];
824 f[ak][ZZ] += a*temp[ZZ];
828 ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
830 ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
832 ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
835 svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
840 if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
841 rvec_dec(fshift[svi],fv);
842 fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
843 fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
844 fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
845 fshift[ sji][XX] += temp[XX];
846 fshift[ sji][YY] += temp[YY];
847 fshift[ sji][ZZ] += temp[ZZ];
848 fshift[ skj][XX] += a*temp[XX];
849 fshift[ skj][YY] += a*temp[YY];
850 fshift[ skj][ZZ] += a*temp[ZZ];
855 /* When VirCorr=TRUE, the virial for the current forces is not
856 * calculated from the redistributed forces. This means that
857 * the effect of non-linear virtual site constructions on the virial
858 * needs to be added separately. This contribution can be calculated
859 * in many ways, but the simplest and cheapest way is to use
860 * the first constructing atom ai as a reference position in space:
861 * subtract (xv-xi)*fv and add (xj-xi)*fj + (xk-xi)*fk.
866 pbc_rvec_sub(pbc,x[av],x[ai],xiv);
872 /* As xix is a linear combination of j and k, use that here */
873 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
878 /* TOTAL: 61 flops */
881 static void spread_vsite3FAD(t_iatom ia[],real a,real b,
882 rvec x[],rvec f[],rvec fshift[],
883 gmx_bool VirCorr,matrix dxdf,
884 t_pbc *pbc,t_graph *g)
886 rvec xvi,xij,xjk,xperp,Fpij,Fppp,fv,f1,f2,f3;
887 real a1,b1,c1,c2,invdij,invdij2,invdp,fproj;
896 copy_rvec(f[ia[1]],fv);
898 sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
899 skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
902 invdij = gmx_invsqrt(iprod(xij,xij));
903 invdij2 = invdij * invdij;
904 c1 = iprod(xij,xjk) * invdij2;
905 xperp[XX] = xjk[XX] - c1*xij[XX];
906 xperp[YY] = xjk[YY] - c1*xij[YY];
907 xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
908 /* xperp in plane ijk, perp. to ij */
909 invdp = gmx_invsqrt(iprod(xperp,xperp));
914 /* a1, b1 and c1 are already calculated in constr_vsite3FAD
915 storing them somewhere will save 45 flops! */
917 fproj=iprod(xij ,fv)*invdij2;
918 svmul(fproj, xij, Fpij); /* proj. f on xij */
919 svmul(iprod(xperp,fv)*invdp*invdp,xperp,Fppp); /* proj. f on xperp */
920 svmul(b1*fproj, xperp,f3);
923 rvec_sub(fv,Fpij,f1); /* f1 = f - Fpij */
924 rvec_sub(f1,Fppp,f2); /* f2 = f - Fpij - Fppp */
925 for (d=0; (d<DIM); d++) {
932 f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
933 f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
934 f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
935 f[aj][XX] += f1[XX] - c2*f2[XX] - f3[XX];
936 f[aj][YY] += f1[YY] - c2*f2[YY] - f3[YY];
937 f[aj][ZZ] += f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
944 ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
946 ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
948 ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
951 svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
956 if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
957 rvec_dec(fshift[svi],fv);
958 fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
959 fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
960 fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
961 fshift[ sji][XX] += f1[XX] - c1 *f2[XX] - f3[XX];
962 fshift[ sji][YY] += f1[YY] - c1 *f2[YY] - f3[YY];
963 fshift[ sji][ZZ] += f1[ZZ] - c1 *f2[ZZ] - f3[ZZ];
964 fshift[ skj][XX] += f2[XX];
965 fshift[ skj][YY] += f2[YY];
966 fshift[ skj][ZZ] += f2[ZZ];
974 pbc_rvec_sub(pbc,x[av],x[ai],xiv);
980 /* Note that xik=xij+xjk, so we have to add xij*f2 */
983 + xij[i]*(f1[j] + (1 - c2)*f2[j] - f3[j])
989 /* TOTAL: 113 flops */
992 static void spread_vsite3OUT(t_iatom ia[],real a,real b,real c,
993 rvec x[],rvec f[],rvec fshift[],
994 gmx_bool VirCorr,matrix dxdf,
995 t_pbc *pbc,t_graph *g)
997 rvec xvi,xij,xik,fv,fj,fk;
1008 sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1009 ski = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
1012 copy_rvec(f[av],fv);
1019 fj[XX] = a*fv[XX] - xik[ZZ]*cfy + xik[YY]*cfz;
1020 fj[YY] = xik[ZZ]*cfx + a*fv[YY] - xik[XX]*cfz;
1021 fj[ZZ] = -xik[YY]*cfx + xik[XX]*cfy + a*fv[ZZ];
1023 fk[XX] = b*fv[XX] + xij[ZZ]*cfy - xij[YY]*cfz;
1024 fk[YY] = -xij[ZZ]*cfx + b*fv[YY] + xij[XX]*cfz;
1025 fk[ZZ] = xij[YY]*cfx - xij[XX]*cfy + b*fv[ZZ];
1028 f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
1029 f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
1030 f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
1036 ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
1038 ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1040 ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
1043 svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1048 if (fshift && (svi!=CENTRAL || sji!=CENTRAL || ski!=CENTRAL)) {
1049 rvec_dec(fshift[svi],fv);
1050 fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
1051 fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
1052 fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
1053 rvec_inc(fshift[sji],fj);
1054 rvec_inc(fshift[ski],fk);
1062 pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1064 for(i=0; i<DIM; i++)
1066 for(j=0; j<DIM; j++)
1068 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j];
1073 /* TOTAL: 54 flops */
1076 static void spread_vsite4FD(t_iatom ia[],real a,real b,real c,
1077 rvec x[],rvec f[],rvec fshift[],
1078 gmx_bool VirCorr,matrix dxdf,
1079 t_pbc *pbc,t_graph *g)
1081 real d,invl,fproj,a1;
1082 rvec xvi,xij,xjk,xjl,xix,fv,temp;
1083 atom_id av,ai,aj,ak,al;
1085 int svi,sji,skj,slj,m;
1093 sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1094 skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
1095 slj = pbc_rvec_sub(pbc,x[al],x[aj],xjl);
1098 /* xix goes from i to point x on the plane jkl */
1099 for(m=0; m<DIM; m++)
1100 xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
1103 invl=gmx_invsqrt(iprod(xix,xix));
1105 /* 4 + ?10? flops */
1107 copy_rvec(f[av],fv);
1109 fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
1111 for(m=0; m<DIM; m++)
1112 temp[m] = d*(fv[m] - fproj*xix[m]);
1115 /* c is already calculated in constr_vsite3FD
1116 storing c somewhere will save 35 flops! */
1119 for(m=0; m<DIM; m++) {
1120 f[ai][m] += fv[m] - temp[m];
1121 f[aj][m] += a1*temp[m];
1122 f[ak][m] += a*temp[m];
1123 f[al][m] += b*temp[m];
1128 ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
1130 ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1132 ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
1134 ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,aj),di);
1137 svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1143 (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL || slj!=CENTRAL)) {
1144 rvec_dec(fshift[svi],fv);
1145 for(m=0; m<DIM; m++) {
1146 fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
1147 fshift[ sji][m] += temp[m];
1148 fshift[ skj][m] += a*temp[m];
1149 fshift[ slj][m] += b*temp[m];
1158 pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1160 for(i=0; i<DIM; i++)
1162 for(j=0; j<DIM; j++)
1164 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
1169 /* TOTAL: 77 flops */
1173 static void spread_vsite4FDN(t_iatom ia[],real a,real b,real c,
1174 rvec x[],rvec f[],rvec fshift[],
1175 gmx_bool VirCorr,matrix dxdf,
1176 t_pbc *pbc,t_graph *g)
1178 rvec xvi,xij,xik,xil,ra,rb,rja,rjb,rab,rm,rt;
1184 int svi,sij,sik,sil;
1186 /* DEBUG: check atom indices */
1193 copy_rvec(f[av],fv);
1195 sij = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1196 sik = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
1197 sil = pbc_rvec_sub(pbc,x[al],x[ai],xil);
1210 rvec_sub(ra,xij,rja);
1211 rvec_sub(rb,xij,rjb);
1212 rvec_sub(rb,ra,rab);
1218 invrm=gmx_invsqrt(norm2(rm));
1222 cfx = c*invrm*fv[XX];
1223 cfy = c*invrm*fv[YY];
1224 cfz = c*invrm*fv[ZZ];
1235 fj[XX] = ( -rm[XX]*rt[XX]) * cfx + ( rab[ZZ]-rm[YY]*rt[XX]) * cfy + (-rab[YY]-rm[ZZ]*rt[XX]) * cfz;
1236 fj[YY] = (-rab[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + ( rab[XX]-rm[ZZ]*rt[YY]) * cfz;
1237 fj[ZZ] = ( rab[YY]-rm[XX]*rt[ZZ]) * cfx + (-rab[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1248 fk[XX] = ( -rm[XX]*rt[XX]) * cfx + (-a*rjb[ZZ]-rm[YY]*rt[XX]) * cfy + ( a*rjb[YY]-rm[ZZ]*rt[XX]) * cfz;
1249 fk[YY] = ( a*rjb[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + (-a*rjb[XX]-rm[ZZ]*rt[YY]) * cfz;
1250 fk[ZZ] = (-a*rjb[YY]-rm[XX]*rt[ZZ]) * cfx + ( a*rjb[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1261 fl[XX] = ( -rm[XX]*rt[XX]) * cfx + ( b*rja[ZZ]-rm[YY]*rt[XX]) * cfy + (-b*rja[YY]-rm[ZZ]*rt[XX]) * cfz;
1262 fl[YY] = (-b*rja[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + ( b*rja[XX]-rm[ZZ]*rt[YY]) * cfz;
1263 fl[ZZ] = ( b*rja[YY]-rm[XX]*rt[ZZ]) * cfx + (-b*rja[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1266 f[ai][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1267 f[ai][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1268 f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1275 ivec_sub(SHIFT_IVEC(g,av),SHIFT_IVEC(g,ai),di);
1277 ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1279 ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
1281 ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,ai),di);
1284 svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1289 if (fshift && (svi!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL || sil!=CENTRAL)) {
1290 rvec_dec(fshift[svi],fv);
1291 fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1292 fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1293 fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1294 rvec_inc(fshift[sij],fj);
1295 rvec_inc(fshift[sik],fk);
1296 rvec_inc(fshift[sil],fl);
1304 pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1306 for(i=0; i<DIM; i++)
1308 for(j=0; j<DIM; j++)
1310 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j] + xil[i]*fl[j];
1315 /* Total: 207 flops (Yuck!) */
1319 static int spread_vsiten(t_iatom ia[],t_iparams ip[],
1320 rvec x[],rvec f[],rvec fshift[],
1321 t_pbc *pbc,t_graph *g)
1329 n3 = 3*ip[ia[0]].vsiten.n;
1331 copy_rvec(x[av],xv);
1333 for(i=0; i<n3; i+=3) {
1336 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
1339 siv = pbc_dx_aiuc(pbc,x[ai],xv,dx);
1343 a = ip[ia[i]].vsiten.a;
1346 if (fshift && siv != CENTRAL) {
1347 rvec_inc(fshift[siv],fi);
1348 rvec_dec(fshift[CENTRAL],fi);
1357 static int vsite_count(const t_ilist *ilist,int ftype)
1359 if (ftype == F_VSITEN)
1361 return ilist[ftype].nr/3;
1365 return ilist[ftype].nr/(1 + interaction_function[ftype].nratoms);
1369 static void spread_vsite_f_thread(gmx_vsite_t *vsite,
1370 rvec x[],rvec f[],rvec *fshift,
1371 gmx_bool VirCorr,matrix dxdf,
1372 t_iparams ip[],t_ilist ilist[],
1373 t_graph *g,t_pbc *pbc_null)
1377 int i,inc,m,nra,nr,tp,ftype;
1387 bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
1389 /* this loop goes backwards to be able to build *
1390 * higher type vsites from lower types */
1393 for(ftype=F_NRE-1; (ftype>=0); ftype--)
1395 if ((interaction_function[ftype].flags & IF_VSITE) &&
1396 ilist[ftype].nr > 0)
1398 nra = interaction_function[ftype].nratoms;
1400 nr = ilist[ftype].nr;
1401 ia = ilist[ftype].iatoms;
1405 pbc_null2 = pbc_null;
1407 else if (pbc_null != NULL)
1409 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
1414 if (vsite_pbc != NULL)
1416 if (vsite_pbc[i/(1+nra)] > -2)
1418 pbc_null2 = pbc_null;
1428 /* Constants for constructing */
1429 a1 = ip[tp].vsite.a;
1430 /* Construct the vsite depending on type */
1434 spread_vsite2(ia,a1,x,f,fshift,pbc_null2,g);
1437 b1 = ip[tp].vsite.b;
1438 spread_vsite3(ia,a1,b1,x,f,fshift,pbc_null2,g);
1441 b1 = ip[tp].vsite.b;
1442 spread_vsite3FD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1445 b1 = ip[tp].vsite.b;
1446 spread_vsite3FAD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1449 b1 = ip[tp].vsite.b;
1450 c1 = ip[tp].vsite.c;
1451 spread_vsite3OUT(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1454 b1 = ip[tp].vsite.b;
1455 c1 = ip[tp].vsite.c;
1456 spread_vsite4FD(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1459 b1 = ip[tp].vsite.b;
1460 c1 = ip[tp].vsite.c;
1461 spread_vsite4FDN(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1464 inc = spread_vsiten(ia,ip,x,f,fshift,pbc_null2,g);
1467 gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
1468 ftype,__FILE__,__LINE__);
1470 clear_rvec(f[ia[1]]);
1472 /* Increment loop variables */
1480 void spread_vsite_f(FILE *log,gmx_vsite_t *vsite,
1481 rvec x[],rvec f[],rvec *fshift,
1482 gmx_bool VirCorr,matrix vir,
1483 t_nrnb *nrnb,t_idef *idef,
1484 int ePBC,gmx_bool bMolPBC,t_graph *g,matrix box,
1487 t_pbc pbc,*pbc_null;
1490 /* We only need to do pbc when we have inter-cg vsites */
1491 if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite)
1493 /* This is wasting some CPU time as we now do this multiple times
1494 * per MD step. But how often do we have vsites with full pbc?
1496 pbc_null = set_pbc_dd(&pbc,ePBC,cr->dd,FALSE,box);
1503 if (DOMAINDECOMP(cr))
1505 dd_clear_f_vsites(cr->dd,f);
1507 else if (PARTDECOMP(cr) && vsite->vsitecomm != NULL)
1509 pd_clear_nonlocal_constructs(vsite->vsitecomm,f);
1512 if (vsite->nthreads == 1)
1514 spread_vsite_f_thread(vsite,
1516 VirCorr,vsite->tdata[0].dxdf,
1517 idef->iparams,idef->il,
1522 /* First spread the vsites that might depend on other vsites */
1523 spread_vsite_f_thread(vsite,
1525 VirCorr,vsite->tdata[vsite->nthreads].dxdf,
1527 vsite->tdata[vsite->nthreads].ilist,
1530 #pragma omp parallel num_threads(vsite->nthreads)
1535 thread = gmx_omp_get_thread_num();
1537 if (thread == 0 || fshift == NULL)
1543 fshift_t = vsite->tdata[thread].fshift;
1546 spread_vsite_f_thread(vsite,
1548 VirCorr,vsite->tdata[thread].dxdf,
1550 vsite->tdata[thread].ilist,
1558 for(th=1; th<vsite->nthreads; th++)
1560 for(i=0; i<SHIFTS; i++)
1562 rvec_inc(fshift[i],vsite->tdata[th].fshift[i]);
1572 for(th=0; th<(vsite->nthreads==1 ? 1 : vsite->nthreads+1); th++)
1574 for(i=0; i<DIM; i++)
1576 for(j=0; j<DIM; j++)
1578 vir[i][j] += -0.5*vsite->tdata[th].dxdf[i][j];
1584 if (DOMAINDECOMP(cr))
1586 dd_move_f_vsites(cr->dd,f,fshift);
1588 else if (vsite->bPDvsitecomm)
1590 /* We only move forces here, and they are independent of shifts */
1591 move_construct_f(vsite->vsitecomm,f,cr);
1594 inc_nrnb(nrnb,eNR_VSITE2, vsite_count(idef->il,F_VSITE2));
1595 inc_nrnb(nrnb,eNR_VSITE3, vsite_count(idef->il,F_VSITE3));
1596 inc_nrnb(nrnb,eNR_VSITE3FD, vsite_count(idef->il,F_VSITE3FD));
1597 inc_nrnb(nrnb,eNR_VSITE3FAD,vsite_count(idef->il,F_VSITE3FAD));
1598 inc_nrnb(nrnb,eNR_VSITE3OUT,vsite_count(idef->il,F_VSITE3OUT));
1599 inc_nrnb(nrnb,eNR_VSITE4FD, vsite_count(idef->il,F_VSITE4FD));
1600 inc_nrnb(nrnb,eNR_VSITE4FDN,vsite_count(idef->il,F_VSITE4FDN));
1601 inc_nrnb(nrnb,eNR_VSITEN, vsite_count(idef->il,F_VSITEN));
1604 static int *atom2cg(t_block *cgs)
1608 snew(a2cg,cgs->index[cgs->nr]);
1609 for(cg=0; cg<cgs->nr; cg++) {
1610 for(i=cgs->index[cg]; i<cgs->index[cg+1]; i++)
1617 static int count_intercg_vsite(gmx_mtop_t *mtop,
1618 gmx_bool *bHaveChargeGroups)
1620 int mb,mt,ftype,nral,i,cg,a;
1621 gmx_molblock_t *molb;
1622 gmx_moltype_t *molt;
1626 int n_intercg_vsite;
1628 *bHaveChargeGroups = FALSE;
1630 n_intercg_vsite = 0;
1631 for(mb=0; mb<mtop->nmolblock; mb++)
1633 molb = &mtop->molblock[mb];
1634 molt = &mtop->moltype[molb->type];
1636 if (molt->cgs.nr < molt->atoms.nr)
1638 *bHaveChargeGroups = TRUE;
1641 a2cg = atom2cg(&molt->cgs);
1642 for(ftype=0; ftype<F_NRE; ftype++)
1644 if (interaction_function[ftype].flags & IF_VSITE)
1647 il = &molt->ilist[ftype];
1649 for(i=0; i<il->nr; i+=1+nral)
1652 for(a=1; a<nral; a++)
1654 if (a2cg[ia[1+a]] != cg) {
1655 n_intercg_vsite += molb->nmol;
1665 return n_intercg_vsite;
1668 static int **get_vsite_pbc(t_iparams *iparams,t_ilist *ilist,
1669 t_atom *atom,t_mdatoms *md,
1670 t_block *cgs,int *a2cg)
1672 int ftype,nral,i,j,vsi,vsite,cg_v,cg_c,a,nc3=0;
1675 int **vsite_pbc,*vsite_pbc_f;
1677 gmx_bool bViteOnlyCG_and_FirstAtom;
1679 /* Make an array that tells if the pbc of an atom is set */
1680 snew(pbc_set,cgs->index[cgs->nr]);
1681 /* PBC is set for all non vsites */
1682 for(a=0; a<cgs->index[cgs->nr]; a++) {
1683 if ((atom && atom[a].ptype != eptVSite) ||
1684 (md && md->ptype[a] != eptVSite)) {
1689 snew(vsite_pbc,F_VSITEN-F_VSITE2+1);
1691 for(ftype=0; ftype<F_NRE; ftype++) {
1692 if (interaction_function[ftype].flags & IF_VSITE) {
1697 snew(vsite_pbc[ftype-F_VSITE2],il->nr/(1+nral));
1698 vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
1701 while (i < il->nr) {
1705 /* A value of -2 signals that this vsite and its contructing
1706 * atoms are all within the same cg, so no pbc is required.
1708 vsite_pbc_f[vsi] = -2;
1709 /* Check if constructing atoms are outside the vsite's cg */
1711 if (ftype == F_VSITEN) {
1712 nc3 = 3*iparams[ia[i]].vsiten.n;
1713 for(j=0; j<nc3; j+=3) {
1714 if (a2cg[ia[i+j+2]] != cg_v)
1715 vsite_pbc_f[vsi] = -1;
1718 for(a=1; a<nral; a++) {
1719 if (a2cg[ia[i+1+a]] != cg_v)
1720 vsite_pbc_f[vsi] = -1;
1723 if (vsite_pbc_f[vsi] == -1) {
1724 /* Check if this is the first processed atom of a vsite only cg */
1725 bViteOnlyCG_and_FirstAtom = TRUE;
1726 for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1727 /* Non-vsites already have pbc set, so simply check for pbc_set */
1729 bViteOnlyCG_and_FirstAtom = FALSE;
1733 if (bViteOnlyCG_and_FirstAtom) {
1734 /* First processed atom of a vsite only charge group.
1735 * The pbc of the input coordinates to construct_vsites
1736 * should be preserved.
1738 vsite_pbc_f[vsi] = vsite;
1739 } else if (cg_v != a2cg[ia[1+i+1]]) {
1740 /* This vsite has a different charge group index
1741 * than it's first constructing atom
1742 * and the charge group has more than one atom,
1743 * search for the first normal particle
1744 * or vsite that already had its pbc defined.
1745 * If nothing is found, use full pbc for this vsite.
1747 for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1748 if (a != vsite && pbc_set[a]) {
1749 vsite_pbc_f[vsi] = a;
1751 fprintf(debug,"vsite %d match pbc with atom %d\n",
1757 fprintf(debug,"vsite atom %d cg %d - %d pbc atom %d\n",
1758 vsite+1,cgs->index[cg_v]+1,cgs->index[cg_v+1],
1759 vsite_pbc_f[vsi]+1);
1762 if (ftype == F_VSITEN) {
1763 /* The other entries in vsite_pbc_f are not used for center vsites */
1769 /* This vsite now has its pbc defined */
1781 gmx_vsite_t *init_vsite(gmx_mtop_t *mtop,t_commrec *cr,
1782 gmx_bool bSerial_NoPBC)
1788 gmx_moltype_t *molt;
1791 /* check if there are vsites */
1793 for(i=0; i<F_NRE; i++)
1795 if (interaction_function[i].flags & IF_VSITE)
1797 nvsite += gmx_mtop_ftype_count(mtop,i);
1808 vsite->n_intercg_vsite = count_intercg_vsite(mtop,
1809 &vsite->bHaveChargeGroups);
1811 /* If we don't have charge groups, the vsite follows its own pbc */
1812 if (!bSerial_NoPBC &&
1813 vsite->bHaveChargeGroups &&
1814 vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr))
1816 vsite->nvsite_pbc_molt = mtop->nmoltype;
1817 snew(vsite->vsite_pbc_molt,vsite->nvsite_pbc_molt);
1818 for(mt=0; mt<mtop->nmoltype; mt++)
1820 molt = &mtop->moltype[mt];
1821 /* Make an atom to charge group index */
1822 a2cg = atom2cg(&molt->cgs);
1823 vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
1825 molt->atoms.atom,NULL,
1830 snew(vsite->vsite_pbc_loc_nalloc,F_VSITEN-F_VSITE2+1);
1831 snew(vsite->vsite_pbc_loc ,F_VSITEN-F_VSITE2+1);
1836 vsite->nthreads = 1;
1840 vsite->nthreads = gmx_omp_nthreads_get(emntVSITE);
1844 /* We need one extra thread data structure for the overlap vsites */
1845 snew(vsite->tdata,vsite->nthreads+1);
1848 vsite->th_ind = NULL;
1849 vsite->th_ind_nalloc = 0;
1854 static void prepare_vsite_thread(const t_ilist *ilist,
1855 gmx_vsite_thread_t *vsite_th)
1859 for(ftype=0; ftype<F_NRE; ftype++)
1861 if (interaction_function[ftype].flags & IF_VSITE)
1863 if (ilist[ftype].nr > vsite_th->ilist[ftype].nalloc)
1865 vsite_th->ilist[ftype].nalloc = over_alloc_large(ilist[ftype].nr);
1866 srenew(vsite_th->ilist[ftype].iatoms,vsite_th->ilist[ftype].nalloc);
1869 vsite_th->ilist[ftype].nr = 0;
1874 void split_vsites_over_threads(const t_ilist *ilist,
1875 const t_mdatoms *mdatoms,
1876 gmx_bool bLimitRange,
1880 int vsite_atom_range,natperthread;
1887 if (vsite->nthreads == 1)
1893 #pragma omp parallel for num_threads(vsite->nthreads) schedule(static)
1894 for(th=0; th<vsite->nthreads; th++)
1896 prepare_vsite_thread(ilist,&vsite->tdata[th]);
1898 /* Master threads does the (potential) overlap vsites */
1899 prepare_vsite_thread(ilist,&vsite->tdata[vsite->nthreads]);
1901 /* The current way of distributing the vsites over threads in primitive.
1902 * We divide the atom range 0 - natoms_in_vsite uniformly over threads,
1903 * without taking into account how the vsites are distributed.
1904 * Without domain decomposition we bLimitRange=TRUE and we at least
1905 * tighten the upper bound of the range (useful for common systems
1906 * such as a vsite-protein in 3-site water).
1910 vsite_atom_range = -1;
1911 for(ftype=0; ftype<F_NRE; ftype++)
1913 if ((interaction_function[ftype].flags & IF_VSITE) &&
1916 nral1 = 1 + NRAL(ftype);
1917 iat = ilist[ftype].iatoms;
1918 for(i=0; i<ilist[ftype].nr; i+=nral1)
1920 for(j=i+1; j<i+nral1; j++)
1922 vsite_atom_range = max(vsite_atom_range,iat[j]);
1931 vsite_atom_range = mdatoms->homenr;
1933 natperthread = (vsite_atom_range + vsite->nthreads - 1)/vsite->nthreads;
1937 fprintf(debug,"virtual site thread dist: natoms %d, range %d, natperthread %d\n",mdatoms->nr,vsite_atom_range,natperthread);
1940 /* To simplify the vsite assignment, we make an index which tells us
1941 * to which thread particles, both non-vsites and vsites, are assigned.
1943 if (mdatoms->nr > vsite->th_ind_nalloc)
1945 vsite->th_ind_nalloc = over_alloc_large(mdatoms->nr);
1946 srenew(vsite->th_ind,vsite->th_ind_nalloc);
1948 th_ind = vsite->th_ind;
1950 for(i=0; i<mdatoms->nr; i++)
1952 if (mdatoms->ptype[i] == eptVSite)
1954 /* vsites are not assigned to a thread yet */
1959 /* assign non-vsite particles to thread th */
1962 if (i == (th + 1)*natperthread && th < vsite->nthreads)
1968 for(ftype=0; ftype<F_NRE; ftype++)
1970 if ((interaction_function[ftype].flags & IF_VSITE) &&
1973 nral1 = 1 + NRAL(ftype);
1975 iat = ilist[ftype].iatoms;
1976 for(i=0; i<ilist[ftype].nr; )
1978 th = iat[1+i]/natperthread;
1979 /* We would like to assign this vsite the thread th,
1980 * but it might depend on atoms outside the atom range of th
1981 * or on another vsite not assigned to thread th.
1983 if (ftype != F_VSITEN)
1985 for(j=i+2; j<i+nral1; j++)
1987 if (th_ind[iat[j]] != th)
1989 /* Some constructing atoms are not assigned to
1990 * thread th, move this vsite to a separate batch.
1992 th = vsite->nthreads;
1999 for(j=i+2; j<i+inc; j+=3)
2001 if (th_ind[iat[j]] != th)
2003 th = vsite->nthreads;
2007 /* Copy this vsite to the thread data struct of thread th */
2008 il_th = &vsite->tdata[th].ilist[ftype];
2009 for(j=i; j<i+inc; j++)
2011 il_th->iatoms[il_th->nr++] = iat[j];
2013 /* Update this vsite's thread index entry */
2014 th_ind[iat[1+i]] = th;
2023 for(ftype=0; ftype<F_NRE; ftype++)
2025 if ((interaction_function[ftype].flags & IF_VSITE) &&
2026 ilist[ftype].nr > 0)
2028 fprintf(debug,"%-20s thread dist:",
2029 interaction_function[ftype].longname);
2030 for(th=0; th<vsite->nthreads+1; th++)
2032 fprintf(debug," %4d",vsite->tdata[th].ilist[ftype].nr);
2034 fprintf(debug,"\n");
2040 void set_vsite_top(gmx_vsite_t *vsite,gmx_localtop_t *top,t_mdatoms *md,
2045 if (vsite->n_intercg_vsite > 0)
2047 if (vsite->bHaveChargeGroups)
2049 /* Make an atom to charge group index */
2050 a2cg = atom2cg(&top->cgs);
2051 vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
2052 top->idef.il,NULL,md,
2059 snew(vsite->vsitecomm,1);
2060 vsite->bPDvsitecomm =
2061 setup_parallel_vsites(&(top->idef),cr,vsite->vsitecomm);
2065 if (vsite->nthreads > 1)
2067 if (vsite->bHaveChargeGroups || PARTDECOMP(cr))
2069 gmx_incons("Can not use threads virtual sites combined with charge groups or particle decomposition");
2072 split_vsites_over_threads(top->idef.il,md,!DOMAINDECOMP(cr),vsite);