1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
53 #include "mtop_util.h"
55 /* Routines to send/recieve coordinates and force
56 * of constructing atoms.
59 static void move_construct_x(t_comm_vsites *vsitecomm, rvec x[], t_commrec *cr)
65 sendbuf = vsitecomm->send_buf;
66 recvbuf = vsitecomm->recv_buf;
69 /* Prepare pulse left by copying to send buffer */
70 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
72 ia = vsitecomm->left_export_construct[i];
73 copy_rvec(x[ia],sendbuf[i]);
76 /* Pulse coordinates left */
77 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_export_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_import_nconstruct);
79 /* Copy from receive buffer to coordinate array */
80 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
82 ia = vsitecomm->right_import_construct[i];
83 copy_rvec(recvbuf[i],x[ia]);
86 /* Prepare pulse right by copying to send buffer */
87 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
89 ia = vsitecomm->right_export_construct[i];
90 copy_rvec(x[ia],sendbuf[i]);
93 /* Pulse coordinates right */
94 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_export_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_import_nconstruct);
96 /* Copy from receive buffer to coordinate array */
97 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
99 ia = vsitecomm->left_import_construct[i];
100 copy_rvec(recvbuf[i],x[ia]);
105 static void move_construct_f(t_comm_vsites *vsitecomm, rvec f[], t_commrec *cr)
111 sendbuf = vsitecomm->send_buf;
112 recvbuf = vsitecomm->recv_buf;
114 /* Prepare pulse right by copying to send buffer */
115 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
117 ia = vsitecomm->right_import_construct[i];
118 copy_rvec(f[ia],sendbuf[i]);
119 clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
122 /* Pulse forces right */
123 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_import_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_export_nconstruct);
125 /* Copy from receive buffer to coordinate array */
126 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
128 ia = vsitecomm->left_export_construct[i];
129 rvec_inc(f[ia],recvbuf[i]);
132 /* Prepare pulse left by copying to send buffer */
133 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
135 ia = vsitecomm->left_import_construct[i];
136 copy_rvec(f[ia],sendbuf[i]);
137 clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
140 /* Pulse coordinates left */
141 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_import_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_export_nconstruct);
143 /* Copy from receive buffer to coordinate array */
144 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
146 ia = vsitecomm->right_export_construct[i];
147 rvec_inc(f[ia],recvbuf[i]);
150 /* All forces are now on the home processors */
155 pd_clear_nonlocal_constructs(t_comm_vsites *vsitecomm, rvec f[])
159 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
161 ia = vsitecomm->left_import_construct[i];
164 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
166 ia = vsitecomm->right_import_construct[i];
173 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
176 return pbc_dx_aiuc(pbc,xi,xj,dx);
184 /* Vsite construction routines */
186 static void constr_vsite2(rvec xi,rvec xj,rvec x,real a,t_pbc *pbc)
195 pbc_dx_aiuc(pbc,xj,xi,dx);
196 x[XX] = xi[XX] + a*dx[XX];
197 x[YY] = xi[YY] + a*dx[YY];
198 x[ZZ] = xi[ZZ] + a*dx[ZZ];
200 x[XX] = b*xi[XX] + a*xj[XX];
201 x[YY] = b*xi[YY] + a*xj[YY];
202 x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
206 /* TOTAL: 10 flops */
209 static void constr_vsite3(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
219 pbc_dx_aiuc(pbc,xj,xi,dxj);
220 pbc_dx_aiuc(pbc,xk,xi,dxk);
221 x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
222 x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
223 x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
225 x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
226 x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
227 x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
231 /* TOTAL: 17 flops */
234 static void constr_vsite3FD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
240 pbc_rvec_sub(pbc,xj,xi,xij);
241 pbc_rvec_sub(pbc,xk,xj,xjk);
244 /* temp goes from i to a point on the line jk */
245 temp[XX] = xij[XX] + a*xjk[XX];
246 temp[YY] = xij[YY] + a*xjk[YY];
247 temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
250 c=b*gmx_invsqrt(iprod(temp,temp));
253 x[XX] = xi[XX] + c*temp[XX];
254 x[YY] = xi[YY] + c*temp[YY];
255 x[ZZ] = xi[ZZ] + c*temp[ZZ];
258 /* TOTAL: 34 flops */
261 static void constr_vsite3FAD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b, t_pbc *pbc)
264 real a1,b1,c1,invdij;
266 pbc_rvec_sub(pbc,xj,xi,xij);
267 pbc_rvec_sub(pbc,xk,xj,xjk);
270 invdij = gmx_invsqrt(iprod(xij,xij));
271 c1 = invdij * invdij * iprod(xij,xjk);
272 xp[XX] = xjk[XX] - c1*xij[XX];
273 xp[YY] = xjk[YY] - c1*xij[YY];
274 xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
276 b1 = b*gmx_invsqrt(iprod(xp,xp));
279 x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
280 x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
281 x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
284 /* TOTAL: 63 flops */
287 static void constr_vsite3OUT(rvec xi,rvec xj,rvec xk,rvec x,
288 real a,real b,real c,t_pbc *pbc)
292 pbc_rvec_sub(pbc,xj,xi,xij);
293 pbc_rvec_sub(pbc,xk,xi,xik);
297 x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
298 x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
299 x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
302 /* TOTAL: 33 flops */
305 static void constr_vsite4FD(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
306 real a,real b,real c,t_pbc *pbc)
308 rvec xij,xjk,xjl,temp;
311 pbc_rvec_sub(pbc,xj,xi,xij);
312 pbc_rvec_sub(pbc,xk,xj,xjk);
313 pbc_rvec_sub(pbc,xl,xj,xjl);
316 /* temp goes from i to a point on the plane jkl */
317 temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
318 temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
319 temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
322 d=c*gmx_invsqrt(iprod(temp,temp));
325 x[XX] = xi[XX] + d*temp[XX];
326 x[YY] = xi[YY] + d*temp[YY];
327 x[ZZ] = xi[ZZ] + d*temp[ZZ];
330 /* TOTAL: 43 flops */
334 static void constr_vsite4FDN(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
335 real a,real b,real c,t_pbc *pbc)
337 rvec xij,xik,xil,ra,rb,rja,rjb,rm;
340 pbc_rvec_sub(pbc,xj,xi,xij);
341 pbc_rvec_sub(pbc,xk,xi,xik);
342 pbc_rvec_sub(pbc,xl,xi,xil);
355 rvec_sub(ra,xij,rja);
356 rvec_sub(rb,xij,rjb);
362 d=c*gmx_invsqrt(norm2(rm));
365 x[XX] = xi[XX] + d*rm[XX];
366 x[YY] = xi[YY] + d*rm[YY];
367 x[ZZ] = xi[ZZ] + d*rm[ZZ];
370 /* TOTAL: 47 flops */
374 static int constr_vsiten(t_iatom *ia, t_iparams ip[],
382 n3 = 3*ip[ia[0]].vsiten.n;
387 for(i=3; i<n3; i+=3) {
389 a = ip[ia[i]].vsiten.a;
391 pbc_dx_aiuc(pbc,x[ai],x1,dx);
393 rvec_sub(x[ai],x1,dx);
395 dsum[XX] += a*dx[XX];
396 dsum[YY] += a*dx[YY];
397 dsum[ZZ] += a*dx[ZZ];
401 x[av][XX] = x1[XX] + dsum[XX];
402 x[av][YY] = x1[YY] + dsum[YY];
403 x[av][ZZ] = x1[ZZ] + dsum[ZZ];
409 void construct_vsites(FILE *log,gmx_vsite_t *vsite,
410 rvec x[],t_nrnb *nrnb,
412 t_iparams ip[],t_ilist ilist[],
413 int ePBC,gmx_bool bMolPBC,t_graph *graph,
414 t_commrec *cr,matrix box)
417 real a1,b1,c1,inv_dt;
418 int i,inc,ii,nra,nr,tp,ftype;
419 t_iatom avsite,ai,aj,ak,al,pbc_atom;
421 t_pbc pbc,*pbc_null,*pbc_null2;
423 int *vsite_pbc,ishift;
424 rvec reftmp,vtmp,rtmp;
426 bDomDec = cr && DOMAINDECOMP(cr);
428 /* We only need to do pbc when we have inter-cg vsites */
429 if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite) {
430 /* This is wasting some CPU time as we now do this multiple times
431 * per MD step. But how often do we have vsites with full pbc?
433 pbc_null = set_pbc_dd(&pbc,ePBC,cr!=NULL ? cr->dd : NULL,FALSE,box);
440 dd_move_x_vsites(cr->dd,box,x);
441 } else if (vsite->bPDvsitecomm) {
442 /* I'm not sure whether the periodicity and shift are guaranteed
443 * to be consistent between different nodes when running e.g. polymers
444 * in parallel. In this special case we thus unshift/shift,
445 * but only when necessary. This is to make sure the coordinates
446 * we move don't end up a box away...
449 unshift_self(graph,box,x);
451 move_construct_x(vsite->vsitecomm,x,cr);
454 shift_self(graph,box,x);
465 for(ftype=0; (ftype<F_NRE); ftype++) {
466 if (interaction_function[ftype].flags & IF_VSITE) {
467 nra = interaction_function[ftype].nratoms;
468 nr = ilist[ftype].nr;
469 ia = ilist[ftype].iatoms;
472 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
480 if (ftype != idef->functype[tp])
481 gmx_incons("Function types for vsites wrong");
484 /* The vsite and constructing atoms */
488 /* Constants for constructing vsites */
490 /* Check what kind of pbc we need to use */
492 pbc_atom = vsite_pbc[i/(1+nra)];
495 /* We need to copy the coordinates here,
496 * single for single atom cg's pbc_atom is the vsite itself.
498 copy_rvec(x[pbc_atom],xpbc);
500 pbc_null2 = pbc_null;
507 /* Copy the old position */
508 copy_rvec(x[avsite],xv);
510 /* Construct the vsite depending on type */
515 constr_vsite2(x[ai],x[aj],x[avsite],a1,pbc_null2);
521 constr_vsite3(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
527 constr_vsite3FD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
533 constr_vsite3FAD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
540 constr_vsite3OUT(x[ai],x[aj],x[ak],x[avsite],a1,b1,c1,pbc_null2);
548 constr_vsite4FD(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
557 constr_vsite4FDN(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
561 inc = constr_vsiten(ia,ip,x,pbc_null2);
564 gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
565 ftype,__FILE__,__LINE__);
569 /* Match the pbc of this vsite to the rest of its charge group */
570 ishift = pbc_dx_aiuc(pbc_null,x[avsite],xpbc,dx);
571 if (ishift != CENTRAL)
572 rvec_add(xpbc,dx,x[avsite]);
575 /* Calculate velocity of vsite... */
576 rvec_sub(x[avsite],xv,vv);
577 svmul(inv_dt,vv,v[avsite]);
580 /* Increment loop variables */
588 static void spread_vsite2(t_iatom ia[],real a,
589 rvec x[],rvec f[],rvec fshift[],
590 t_pbc *pbc,t_graph *g)
611 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
613 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
616 siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
617 sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
623 if (fshift && (siv != CENTRAL || sij != CENTRAL)) {
624 rvec_inc(fshift[siv],f[av]);
625 rvec_dec(fshift[CENTRAL],fi);
626 rvec_dec(fshift[sij],fj);
629 /* TOTAL: 13 flops */
632 void construct_vsites_mtop(FILE *log,gmx_vsite_t *vsite,
633 gmx_mtop_t *mtop,rvec x[])
636 gmx_molblock_t *molb;
640 for(mb=0; mb<mtop->nmolblock; mb++) {
641 molb = &mtop->molblock[mb];
642 molt = &mtop->moltype[molb->type];
643 for(mol=0; mol<molb->nmol; mol++) {
644 construct_vsites(log,vsite,x+as,NULL,0.0,NULL,
645 mtop->ffparams.iparams,molt->ilist,
646 epbcNONE,TRUE,NULL,NULL,NULL);
647 as += molt->atoms.nr;
652 static void spread_vsite3(t_iatom ia[],real a,real b,
653 rvec x[],rvec f[],rvec fshift[],
654 t_pbc *pbc,t_graph *g)
666 svmul(1-a-b,f[av],fi);
677 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ia[1]),di);
679 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
681 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),di);
684 siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
685 sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
686 sik = pbc_dx_aiuc(pbc,x[ai],x[ak],dx);
693 if (fshift && (siv!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL)) {
694 rvec_inc(fshift[siv],f[av]);
695 rvec_dec(fshift[CENTRAL],fi);
696 rvec_dec(fshift[sij],fj);
697 rvec_dec(fshift[sik],fk);
700 /* TOTAL: 20 flops */
703 static void spread_vsite3FD(t_iatom ia[],real a,real b,
704 rvec x[],rvec f[],rvec fshift[],
705 gmx_bool VirCorr,matrix dxdf,
706 t_pbc *pbc,t_graph *g)
708 real fx,fy,fz,c,invl,fproj,a1;
709 rvec xvi,xij,xjk,xix,fv,temp;
720 sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
721 skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
724 /* xix goes from i to point x on the line jk */
725 xix[XX]=xij[XX]+a*xjk[XX];
726 xix[YY]=xij[YY]+a*xjk[YY];
727 xix[ZZ]=xij[ZZ]+a*xjk[ZZ];
730 invl=gmx_invsqrt(iprod(xix,xix));
734 fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
736 temp[XX]=c*(fv[XX]-fproj*xix[XX]);
737 temp[YY]=c*(fv[YY]-fproj*xix[YY]);
738 temp[ZZ]=c*(fv[ZZ]-fproj*xix[ZZ]);
741 /* c is already calculated in constr_vsite3FD
742 storing c somewhere will save 26 flops! */
745 f[ai][XX] += fv[XX] - temp[XX];
746 f[ai][YY] += fv[YY] - temp[YY];
747 f[ai][ZZ] += fv[ZZ] - temp[ZZ];
748 f[aj][XX] += a1*temp[XX];
749 f[aj][YY] += a1*temp[YY];
750 f[aj][ZZ] += a1*temp[ZZ];
751 f[ak][XX] += a*temp[XX];
752 f[ak][YY] += a*temp[YY];
753 f[ak][ZZ] += a*temp[ZZ];
757 ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
759 ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
761 ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
764 svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
769 if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
770 rvec_dec(fshift[svi],fv);
771 fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
772 fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
773 fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
774 fshift[ sji][XX] += temp[XX];
775 fshift[ sji][YY] += temp[YY];
776 fshift[ sji][ZZ] += temp[ZZ];
777 fshift[ skj][XX] += a*temp[XX];
778 fshift[ skj][YY] += a*temp[YY];
779 fshift[ skj][ZZ] += a*temp[ZZ];
784 /* When VirCorr=TRUE, the virial for the current forces is not
785 * calculated from the redistributed forces. This means that
786 * the effect of non-linear virtual site constructions on the virial
787 * needs to be added separately. This contribution can be calculated
788 * in many ways, but the simplest and cheapest way is to use
789 * the first constructing atom ai as a reference position in space:
790 * subtract (xv-xi)*fv and add (xj-xi)*fj + (xk-xi)*fk.
795 pbc_rvec_sub(pbc,x[av],x[ai],xiv);
801 /* As xix is a linear combination of j and k, use that here */
802 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
807 /* TOTAL: 61 flops */
810 static void spread_vsite3FAD(t_iatom ia[],real a,real b,
811 rvec x[],rvec f[],rvec fshift[],
812 gmx_bool VirCorr,matrix dxdf,
813 t_pbc *pbc,t_graph *g)
815 rvec xvi,xij,xjk,xperp,Fpij,Fppp,fv,f1,f2,f3;
816 real a1,b1,c1,c2,invdij,invdij2,invdp,fproj;
825 copy_rvec(f[ia[1]],fv);
827 sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
828 skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
831 invdij = gmx_invsqrt(iprod(xij,xij));
832 invdij2 = invdij * invdij;
833 c1 = iprod(xij,xjk) * invdij2;
834 xperp[XX] = xjk[XX] - c1*xij[XX];
835 xperp[YY] = xjk[YY] - c1*xij[YY];
836 xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
837 /* xperp in plane ijk, perp. to ij */
838 invdp = gmx_invsqrt(iprod(xperp,xperp));
843 /* a1, b1 and c1 are already calculated in constr_vsite3FAD
844 storing them somewhere will save 45 flops! */
846 fproj=iprod(xij ,fv)*invdij2;
847 svmul(fproj, xij, Fpij); /* proj. f on xij */
848 svmul(iprod(xperp,fv)*invdp*invdp,xperp,Fppp); /* proj. f on xperp */
849 svmul(b1*fproj, xperp,f3);
852 rvec_sub(fv,Fpij,f1); /* f1 = f - Fpij */
853 rvec_sub(f1,Fppp,f2); /* f2 = f - Fpij - Fppp */
854 for (d=0; (d<DIM); d++) {
861 f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
862 f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
863 f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
864 f[aj][XX] += f1[XX] - c2*f2[XX] - f3[XX];
865 f[aj][YY] += f1[YY] - c2*f2[YY] - f3[YY];
866 f[aj][ZZ] += f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
873 ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
875 ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
877 ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
880 svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
885 if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
886 rvec_dec(fshift[svi],fv);
887 fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
888 fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
889 fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
890 fshift[ sji][XX] += f1[XX] - c1 *f2[XX] - f3[XX];
891 fshift[ sji][YY] += f1[YY] - c1 *f2[YY] - f3[YY];
892 fshift[ sji][ZZ] += f1[ZZ] - c1 *f2[ZZ] - f3[ZZ];
893 fshift[ skj][XX] += f2[XX];
894 fshift[ skj][YY] += f2[YY];
895 fshift[ skj][ZZ] += f2[ZZ];
903 pbc_rvec_sub(pbc,x[av],x[ai],xiv);
909 /* Note that xik=xij+xjk, so we have to add xij*f2 */
912 + xij[i]*(f1[j] + (1 - c2)*f2[j] - f3[j])
918 /* TOTAL: 113 flops */
921 static void spread_vsite3OUT(t_iatom ia[],real a,real b,real c,
922 rvec x[],rvec f[],rvec fshift[],
923 gmx_bool VirCorr,matrix dxdf,
924 t_pbc *pbc,t_graph *g)
926 rvec xvi,xij,xik,fv,fj,fk;
937 sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
938 ski = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
948 fj[XX] = a*fv[XX] - xik[ZZ]*cfy + xik[YY]*cfz;
949 fj[YY] = xik[ZZ]*cfx + a*fv[YY] - xik[XX]*cfz;
950 fj[ZZ] = -xik[YY]*cfx + xik[XX]*cfy + a*fv[ZZ];
952 fk[XX] = b*fv[XX] + xij[ZZ]*cfy - xij[YY]*cfz;
953 fk[YY] = -xij[ZZ]*cfx + b*fv[YY] + xij[XX]*cfz;
954 fk[ZZ] = xij[YY]*cfx - xij[XX]*cfy + b*fv[ZZ];
957 f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
958 f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
959 f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
965 ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
967 ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
969 ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
972 svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
977 if (fshift && (svi!=CENTRAL || sji!=CENTRAL || ski!=CENTRAL)) {
978 rvec_dec(fshift[svi],fv);
979 fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
980 fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
981 fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
982 rvec_inc(fshift[sji],fj);
983 rvec_inc(fshift[ski],fk);
991 pbc_rvec_sub(pbc,x[av],x[ai],xiv);
997 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j];
1002 /* TOTAL: 54 flops */
1005 static void spread_vsite4FD(t_iatom ia[],real a,real b,real c,
1006 rvec x[],rvec f[],rvec fshift[],
1007 gmx_bool VirCorr,matrix dxdf,
1008 t_pbc *pbc,t_graph *g)
1010 real d,invl,fproj,a1;
1011 rvec xvi,xij,xjk,xjl,xix,fv,temp;
1012 atom_id av,ai,aj,ak,al;
1014 int svi,sji,skj,slj,m;
1022 sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1023 skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
1024 slj = pbc_rvec_sub(pbc,x[al],x[aj],xjl);
1027 /* xix goes from i to point x on the plane jkl */
1028 for(m=0; m<DIM; m++)
1029 xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
1032 invl=gmx_invsqrt(iprod(xix,xix));
1034 /* 4 + ?10? flops */
1036 copy_rvec(f[av],fv);
1038 fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
1040 for(m=0; m<DIM; m++)
1041 temp[m] = d*(fv[m] - fproj*xix[m]);
1044 /* c is already calculated in constr_vsite3FD
1045 storing c somewhere will save 35 flops! */
1048 for(m=0; m<DIM; m++) {
1049 f[ai][m] += fv[m] - temp[m];
1050 f[aj][m] += a1*temp[m];
1051 f[ak][m] += a*temp[m];
1052 f[al][m] += b*temp[m];
1057 ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
1059 ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1061 ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
1063 ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,aj),di);
1066 svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1072 (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL || slj!=CENTRAL)) {
1073 rvec_dec(fshift[svi],fv);
1074 for(m=0; m<DIM; m++) {
1075 fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
1076 fshift[ sji][m] += temp[m];
1077 fshift[ skj][m] += a*temp[m];
1078 fshift[ slj][m] += b*temp[m];
1087 pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1089 for(i=0; i<DIM; i++)
1091 for(j=0; j<DIM; j++)
1093 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
1098 /* TOTAL: 77 flops */
1102 static void spread_vsite4FDN(t_iatom ia[],real a,real b,real c,
1103 rvec x[],rvec f[],rvec fshift[],
1104 gmx_bool VirCorr,matrix dxdf,
1105 t_pbc *pbc,t_graph *g)
1107 rvec xvi,xij,xik,xil,ra,rb,rja,rjb,rab,rm,rt;
1113 int svi,sij,sik,sil;
1115 /* DEBUG: check atom indices */
1122 copy_rvec(f[av],fv);
1124 sij = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1125 sik = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
1126 sil = pbc_rvec_sub(pbc,x[al],x[ai],xil);
1139 rvec_sub(ra,xij,rja);
1140 rvec_sub(rb,xij,rjb);
1141 rvec_sub(rb,ra,rab);
1147 invrm=gmx_invsqrt(norm2(rm));
1151 cfx = c*invrm*fv[XX];
1152 cfy = c*invrm*fv[YY];
1153 cfz = c*invrm*fv[ZZ];
1164 fj[XX] = ( -rm[XX]*rt[XX]) * cfx + ( rab[ZZ]-rm[YY]*rt[XX]) * cfy + (-rab[YY]-rm[ZZ]*rt[XX]) * cfz;
1165 fj[YY] = (-rab[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + ( rab[XX]-rm[ZZ]*rt[YY]) * cfz;
1166 fj[ZZ] = ( rab[YY]-rm[XX]*rt[ZZ]) * cfx + (-rab[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1177 fk[XX] = ( -rm[XX]*rt[XX]) * cfx + (-a*rjb[ZZ]-rm[YY]*rt[XX]) * cfy + ( a*rjb[YY]-rm[ZZ]*rt[XX]) * cfz;
1178 fk[YY] = ( a*rjb[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + (-a*rjb[XX]-rm[ZZ]*rt[YY]) * cfz;
1179 fk[ZZ] = (-a*rjb[YY]-rm[XX]*rt[ZZ]) * cfx + ( a*rjb[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1190 fl[XX] = ( -rm[XX]*rt[XX]) * cfx + ( b*rja[ZZ]-rm[YY]*rt[XX]) * cfy + (-b*rja[YY]-rm[ZZ]*rt[XX]) * cfz;
1191 fl[YY] = (-b*rja[ZZ]-rm[XX]*rt[YY]) * cfx + ( -rm[YY]*rt[YY]) * cfy + ( b*rja[XX]-rm[ZZ]*rt[YY]) * cfz;
1192 fl[ZZ] = ( b*rja[YY]-rm[XX]*rt[ZZ]) * cfx + (-b*rja[XX]-rm[YY]*rt[ZZ]) * cfy + ( -rm[ZZ]*rt[ZZ]) * cfz;
1195 f[ai][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1196 f[ai][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1197 f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1204 ivec_sub(SHIFT_IVEC(g,av),SHIFT_IVEC(g,ai),di);
1206 ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1208 ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
1210 ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,ai),di);
1213 svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1218 if (fshift && (svi!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL || sil!=CENTRAL)) {
1219 rvec_dec(fshift[svi],fv);
1220 fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1221 fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1222 fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1223 rvec_inc(fshift[sij],fj);
1224 rvec_inc(fshift[sik],fk);
1225 rvec_inc(fshift[sil],fl);
1233 pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1235 for(i=0; i<DIM; i++)
1237 for(j=0; j<DIM; j++)
1239 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j] + xil[i]*fl[j];
1244 /* Total: 207 flops (Yuck!) */
1248 static int spread_vsiten(t_iatom ia[],t_iparams ip[],
1249 rvec x[],rvec f[],rvec fshift[],
1250 t_pbc *pbc,t_graph *g)
1258 n3 = 3*ip[ia[0]].vsiten.n;
1260 copy_rvec(x[av],xv);
1262 for(i=0; i<n3; i+=3) {
1265 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
1268 siv = pbc_dx_aiuc(pbc,x[ai],xv,dx);
1272 a = ip[ia[i]].vsiten.a;
1275 if (fshift && siv != CENTRAL) {
1276 rvec_inc(fshift[siv],fi);
1277 rvec_dec(fshift[CENTRAL],fi);
1286 void spread_vsite_f(FILE *log,gmx_vsite_t *vsite,
1287 rvec x[],rvec f[],rvec *fshift,
1288 gmx_bool VirCorr,matrix vir,
1289 t_nrnb *nrnb,t_idef *idef,
1290 int ePBC,gmx_bool bMolPBC,t_graph *g,matrix box,
1294 int i,inc,m,nra,nr,tp,ftype;
1295 int nd2,nd3,nd3FD,nd3FAD,nd3OUT,nd4FD,nd4FDN,ndN;
1298 t_pbc pbc,*pbc_null,*pbc_null2;
1302 /* We only need to do pbc when we have inter-cg vsites */
1303 if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite) {
1304 /* This is wasting some CPU time as we now do this multiple times
1305 * per MD step. But how often do we have vsites with full pbc?
1307 pbc_null = set_pbc_dd(&pbc,ePBC,cr->dd,FALSE,box);
1312 if (DOMAINDECOMP(cr))
1314 dd_clear_f_vsites(cr->dd,f);
1316 else if (PARTDECOMP(cr) && vsite->vsitecomm != NULL)
1318 pd_clear_nonlocal_constructs(vsite->vsitecomm,f);
1337 /* this loop goes backwards to be able to build *
1338 * higher type vsites from lower types */
1340 for(ftype=F_NRE-1; (ftype>=0); ftype--) {
1341 if (interaction_function[ftype].flags & IF_VSITE) {
1342 nra = interaction_function[ftype].nratoms;
1343 nr = idef->il[ftype].nr;
1344 ia = idef->il[ftype].iatoms;
1347 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
1352 for(i=0; (i<nr); ) {
1353 /* Check if we need to apply pbc for this vsite */
1355 if (vsite_pbc[i/(1+nra)] > -2)
1356 pbc_null2 = pbc_null;
1362 if (ftype != idef->functype[tp])
1363 gmx_incons("Functiontypes for vsites wrong");
1365 /* Constants for constructing */
1366 a1 = ip[tp].vsite.a;
1367 /* Construct the vsite depending on type */
1371 spread_vsite2(ia,a1,x,f,fshift,pbc_null2,g);
1375 b1 = ip[tp].vsite.b;
1376 spread_vsite3(ia,a1,b1,x,f,fshift,pbc_null2,g);
1380 b1 = ip[tp].vsite.b;
1381 spread_vsite3FD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1385 b1 = ip[tp].vsite.b;
1386 spread_vsite3FAD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1390 b1 = ip[tp].vsite.b;
1391 c1 = ip[tp].vsite.c;
1392 spread_vsite3OUT(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1396 b1 = ip[tp].vsite.b;
1397 c1 = ip[tp].vsite.c;
1398 spread_vsite4FD(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1402 b1 = ip[tp].vsite.b;
1403 c1 = ip[tp].vsite.c;
1404 spread_vsite4FDN(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1408 inc = spread_vsiten(ia,ip,x,f,fshift,pbc_null2,g);
1412 gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
1413 ftype,__FILE__,__LINE__);
1415 clear_rvec(f[ia[1]]);
1417 /* Increment loop variables */
1428 for(i=0; i<DIM; i++)
1430 for(j=0; j<DIM; j++)
1432 vir[i][j] += -0.5*dxdf[i][j];
1437 inc_nrnb(nrnb,eNR_VSITE2, nd2 );
1438 inc_nrnb(nrnb,eNR_VSITE3, nd3 );
1439 inc_nrnb(nrnb,eNR_VSITE3FD, nd3FD );
1440 inc_nrnb(nrnb,eNR_VSITE3FAD,nd3FAD );
1441 inc_nrnb(nrnb,eNR_VSITE3OUT,nd3OUT );
1442 inc_nrnb(nrnb,eNR_VSITE4FD, nd4FD );
1443 inc_nrnb(nrnb,eNR_VSITE4FDN,nd4FDN );
1444 inc_nrnb(nrnb,eNR_VSITEN, ndN );
1446 if (DOMAINDECOMP(cr)) {
1447 dd_move_f_vsites(cr->dd,f,fshift);
1448 } else if (vsite->bPDvsitecomm) {
1449 /* We only move forces here, and they are independent of shifts */
1450 move_construct_f(vsite->vsitecomm,f,cr);
1454 static int *atom2cg(t_block *cgs)
1458 snew(a2cg,cgs->index[cgs->nr]);
1459 for(cg=0; cg<cgs->nr; cg++) {
1460 for(i=cgs->index[cg]; i<cgs->index[cg+1]; i++)
1467 static int count_intercg_vsite(gmx_mtop_t *mtop)
1469 int mb,mt,ftype,nral,i,cg,a;
1470 gmx_molblock_t *molb;
1471 gmx_moltype_t *molt;
1475 int n_intercg_vsite;
1477 n_intercg_vsite = 0;
1478 for(mb=0; mb<mtop->nmolblock; mb++) {
1479 molb = &mtop->molblock[mb];
1480 molt = &mtop->moltype[molb->type];
1481 a2cg = atom2cg(&molt->cgs);
1482 for(ftype=0; ftype<F_NRE; ftype++) {
1483 if (interaction_function[ftype].flags & IF_VSITE) {
1485 il = &molt->ilist[ftype];
1487 for(i=0; i<il->nr; i+=1+nral) {
1489 for(a=1; a<nral; a++) {
1490 if (a2cg[ia[1+a]] != cg) {
1491 n_intercg_vsite += molb->nmol;
1501 return n_intercg_vsite;
1504 static int **get_vsite_pbc(t_iparams *iparams,t_ilist *ilist,
1505 t_atom *atom,t_mdatoms *md,
1506 t_block *cgs,int *a2cg)
1508 int ftype,nral,i,j,vsi,vsite,cg_v,cg_c,a,nc3=0;
1511 int **vsite_pbc,*vsite_pbc_f;
1513 gmx_bool bViteOnlyCG_and_FirstAtom;
1515 /* Make an array that tells if the pbc of an atom is set */
1516 snew(pbc_set,cgs->index[cgs->nr]);
1517 /* PBC is set for all non vsites */
1518 for(a=0; a<cgs->index[cgs->nr]; a++) {
1519 if ((atom && atom[a].ptype != eptVSite) ||
1520 (md && md->ptype[a] != eptVSite)) {
1525 snew(vsite_pbc,F_VSITEN-F_VSITE2+1);
1527 for(ftype=0; ftype<F_NRE; ftype++) {
1528 if (interaction_function[ftype].flags & IF_VSITE) {
1533 snew(vsite_pbc[ftype-F_VSITE2],il->nr/(1+nral));
1534 vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
1537 while (i < il->nr) {
1541 /* A value of -2 signals that this vsite and its contructing
1542 * atoms are all within the same cg, so no pbc is required.
1544 vsite_pbc_f[vsi] = -2;
1545 /* Check if constructing atoms are outside the vsite's cg */
1547 if (ftype == F_VSITEN) {
1548 nc3 = 3*iparams[ia[i]].vsiten.n;
1549 for(j=0; j<nc3; j+=3) {
1550 if (a2cg[ia[i+j+2]] != cg_v)
1551 vsite_pbc_f[vsi] = -1;
1554 for(a=1; a<nral; a++) {
1555 if (a2cg[ia[i+1+a]] != cg_v)
1556 vsite_pbc_f[vsi] = -1;
1559 if (vsite_pbc_f[vsi] == -1) {
1560 /* Check if this is the first processed atom of a vsite only cg */
1561 bViteOnlyCG_and_FirstAtom = TRUE;
1562 for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1563 /* Non-vsites already have pbc set, so simply check for pbc_set */
1565 bViteOnlyCG_and_FirstAtom = FALSE;
1569 if (bViteOnlyCG_and_FirstAtom) {
1570 /* First processed atom of a vsite only charge group.
1571 * The pbc of the input coordinates to construct_vsites
1572 * should be preserved.
1574 vsite_pbc_f[vsi] = vsite;
1575 } else if (cg_v != a2cg[ia[1+i+1]]) {
1576 /* This vsite has a different charge group index
1577 * than it's first constructing atom
1578 * and the charge group has more than one atom,
1579 * search for the first normal particle
1580 * or vsite that already had its pbc defined.
1581 * If nothing is found, use full pbc for this vsite.
1583 for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1584 if (a != vsite && pbc_set[a]) {
1585 vsite_pbc_f[vsi] = a;
1587 fprintf(debug,"vsite %d match pbc with atom %d\n",
1593 fprintf(debug,"vsite atom %d cg %d - %d pbc atom %d\n",
1594 vsite+1,cgs->index[cg_v]+1,cgs->index[cg_v+1],
1595 vsite_pbc_f[vsi]+1);
1598 if (ftype == F_VSITEN) {
1599 /* The other entries in vsite_pbc_f are not used for center vsites */
1605 /* This vsite now has its pbc defined */
1616 gmx_vsite_t *init_vsite(gmx_mtop_t *mtop,t_commrec *cr)
1622 gmx_moltype_t *molt;
1624 /* check if there are vsites */
1626 for(i=0; i<F_NRE; i++) {
1627 if (interaction_function[i].flags & IF_VSITE) {
1628 nvsite += gmx_mtop_ftype_count(mtop,i);
1638 vsite->n_intercg_vsite = count_intercg_vsite(mtop);
1640 if (vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr)) {
1641 vsite->nvsite_pbc_molt = mtop->nmoltype;
1642 snew(vsite->vsite_pbc_molt,vsite->nvsite_pbc_molt);
1643 for(mt=0; mt<mtop->nmoltype; mt++) {
1644 molt = &mtop->moltype[mt];
1645 /* Make an atom to charge group index */
1646 a2cg = atom2cg(&molt->cgs);
1647 vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
1649 molt->atoms.atom,NULL,
1654 snew(vsite->vsite_pbc_loc_nalloc,F_VSITEN-F_VSITE2+1);
1655 snew(vsite->vsite_pbc_loc ,F_VSITEN-F_VSITE2+1);
1662 void set_vsite_top(gmx_vsite_t *vsite,gmx_localtop_t *top,t_mdatoms *md,
1667 /* Make an atom to charge group index */
1668 a2cg = atom2cg(&top->cgs);
1670 if (vsite->n_intercg_vsite > 0) {
1671 vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
1672 top->idef.il,NULL,md,
1675 if (PARTDECOMP(cr)) {
1676 snew(vsite->vsitecomm,1);
1677 vsite->bPDvsitecomm =
1678 setup_parallel_vsites(&(top->idef),cr,vsite->vsitecomm);