1 #include "types/simple.h"
3 #undef FULL_INTERACTION
4 #undef FULL_INTERACTION_
5 #undef COUL_INTERACTION
6 #undef COUL_INTERACTION_
10 #define FULL_INTERACTION nfcalc_interaction
11 #define FULL_INTERACTION_ nfcalc_interaction_
12 #define COUL_INTERACTION nfcalc_coul_interaction
13 #define COUL_INTERACTION_ nfcalc_coul_interaction_
17 #define FULL_INTERACTION calc_interaction
18 #define FULL_INTERACTION_ calc_interaction_
19 #define COUL_INTERACTION calc_coul_interaction
20 #define COUL_INTERACTION_ calc_coul_interaction_
57 double _Complex zero = __cmplx(0.0,0.0);
58 double _Complex rone = __cmplx(1.0,0.0);
59 double _Complex three = __cmplx(3.0,3.0);
60 double conv1[2],conv2[2],conv3[2];
62 real _facel,_tabscale,_gbtabscale,_krf,_crf;
63 int nri,ntype,nthreads,n,ii,nj0,nj1;
65 #pragma disjoint(*shiftvec,*fshift,*pos,*faction,*charge,*p_facel,*p_krf,*p_crf,*Vc, \
66 *vdwparam,*Vvdw,*p_tabscale,*VFtab,*invsqrta,*dvda,*p_gbtabscale,*GBtab,*work)
70 nthreads = *p_nthreads;
72 #if (COULOMB == COULOMB_TAB || VDW == VDW_TAB)
73 _tabscale = *p_tabscale;
77 #if COULOMB == REACTION_FIELD
84 #if COULOMB == GENERALIZED_BORN
85 _gbtabscale = *p_gbtabscale;
91 for(n=0; (n<nri); n++)
93 double _Complex ix,iy,iz;
95 // initialize potential energies and forces for this paricle
97 double _Complex vctot = zero;
98 double _Complex Vvdwtot = zero;
99 double _Complex dvdasum = zero;
100 double _Complex fix = zero;
101 double _Complex fiy = zero;
102 double _Complex fiz = zero;
104 // shift is the position of the n-th water group
106 int is3 = 3*shift[n];
108 // shiftvec is the center of a water group
110 real _shX = shiftvec[is3];
111 real _shY = shiftvec[is3+1];
112 real _shZ = shiftvec[is3+2];
116 #if VDW == BUCKINGHAM
117 int nti = 3*ntype*type[ii];
119 int nti = 2*ntype*type[ii];
123 real _iq = _facel * charge[ii];
124 #if COULOMB == GENERALIZED_BORN
125 real _isai = invsqrta[ii];
130 // add the shift vector to all water atoms
132 real _ix = _shX + pos[ii3+0];
133 real _iy = _shY + pos[ii3+1];
134 real _iz = _shZ + pos[ii3+2];
136 ix = __cmplx(_ix,_ix);
137 iy = __cmplx(_iy,_iy);
138 iz = __cmplx(_iz,_iz);
143 /* unrolled 6 times : unrolled twice for SIMDization and three times to hide dependencies */
145 for(k=nj0; (k<nj1-5); k+=6)
147 double _Complex dx1,dy1,dz1,fjx1,fjy1,fjz1;
148 double _Complex rsq1,rinv1;
149 double _Complex dx2,dy2,dz2,fjx2,fjy2,fjz2;
150 double _Complex rsq2,rinv2;
151 double _Complex dx3,dy3,dz3,fjx3,fjy3,fjz3;
152 double _Complex rsq3,rinv3;
154 double _Complex rinvsq,krsq,fscal;
155 double _Complex rt,rti,r,eps,Y,F,G,H,GHeps,VV,FF,fijC,fijD,n0,qq;
156 double _Complex isaprod,isaj,iqq,gbscale,dvdaj,vcoul,dvdatmp;
157 double _Complex c6,c12,cexp1,cexp2,rinvsix,Vvdw6,Vvdw12,Vvdwexp;
160 int jnr11,jnr21,jnr12,jnr22,jnr13,jnr23,j11,j21,j12,j22,j13,j23,tj1,tj2;
176 load6_and_merge(pos,j11,j21,dx1,dy1,dz1);
177 load6_and_merge(pos,j12,j22,dx2,dy2,dz2);
178 load6_and_merge(pos,j13,j23,dx3,dy3,dz3);
180 dx1 = __fpsub(ix,dx1);
181 dy1 = __fpsub(iy,dy1);
182 dz1 = __fpsub(iz,dz1);
184 dx2 = __fpsub(ix,dx2);
185 dy2 = __fpsub(iy,dy2);
186 dz2 = __fpsub(iz,dz2);
188 dx3 = __fpsub(ix,dx3);
189 dy3 = __fpsub(iy,dy3);
190 dz3 = __fpsub(iz,dz3);
192 #if COULOMB != COULOMB_NONE
193 qq = __fxpmul(__cmplx(charge[jnr11],charge[jnr21]),_iq);
195 #if COULOMB == GENERALIZED_BORN
196 dvdaj = __cmplx(dvda[jnr11],dvda[jnr21]);
200 rsq1 = vdist2(dx1,dy1,dz1);
201 rsq2 = vdist2(dx2,dy2,dz2);
202 rsq3 = vdist2(dx3,dy3,dz3);
204 rinv1 = __fprsqrte(rsq1);
205 rinv2 = __fprsqrte(rsq2);
206 rinv3 = __fprsqrte(rsq3);
208 rinv1 = sqrt_newton(rinv1,rsq1);
209 rinv2 = sqrt_newton(rinv2,rsq2);
210 rinv3 = sqrt_newton(rinv3,rsq3);
214 rinv1 = sqrt_newton(rinv1,rsq1);
215 rinv2 = sqrt_newton(rinv2,rsq2);
216 rinv3 = sqrt_newton(rinv3,rsq3);
221 load6_and_merge(faction,j11,j21,fjx1,fjy1,fjz1);
224 FULL_INTERACTION(qq,rinv1,rsq1,conv1,jnr11,jnr21);
226 #if COULOMB != COULOMB_NONE
227 qq = __fxpmul(__cmplx(charge[jnr12],charge[jnr22]),_iq);
229 #if COULOMB == GENERALIZED_BORN
230 dvda[jnr11] -= __creal(dvdaj);
231 dvda[jnr21] -= __cimag(dvdaj);
233 dvdaj = __cmplx(dvda[jnr12],dvda[jnr22]);
238 fix = __fpnmsub(fix,dx1,fscal);
239 fiy = __fpnmsub(fiy,dy1,fscal);
240 fiz = __fpnmsub(fiz,dz1,fscal);
242 fjx1 = __fpmadd(fjx1,dx1,fscal);
243 fjy1 = __fpmadd(fjy1,dy1,fscal);
244 fjz1 = __fpmadd(fjz1,dz1,fscal);
246 load6_and_merge(faction,j12,j22,fjx2,fjy2,fjz2);
249 FULL_INTERACTION(qq,rinv2,rsq2,conv2,jnr12,jnr22);
251 #if COULOMB != COULOMB_NONE
252 qq = __fxpmul(__cmplx(charge[jnr13],charge[jnr23]),_iq);
254 #if COULOMB == GENERALIZED_BORN
255 dvda[jnr12] -= __creal(dvdaj);
256 dvda[jnr22] -= __cimag(dvdaj);
258 dvdaj = __cmplx(dvda[jnr13],dvda[jnr23]);
263 split_and_store6(faction,j11,j21,fjx1,fjy1,fjz1);
265 fix = __fpnmsub(fix,dx2,fscal);
266 fiy = __fpnmsub(fiy,dy2,fscal);
267 fiz = __fpnmsub(fiz,dz2,fscal);
269 fjx2 = __fpmadd(fjx2,dx2,fscal);
270 fjy2 = __fpmadd(fjy2,dy2,fscal);
271 fjz2 = __fpmadd(fjz2,dz2,fscal);
273 load6_and_merge(faction,j13,j23,fjx3,fjy3,fjz3);
276 FULL_INTERACTION(qq,rinv3,rsq3,conv3,jnr13,jnr23);
278 #if COULOMB == GENERALIZED_BORN
279 dvda[jnr13] -= __creal(dvdaj);
280 dvda[jnr23] -= __cimag(dvdaj);
284 split_and_store6(faction,j12,j22,fjx2,fjy2,fjz2);
286 fix = __fpnmsub(fix,dx3,fscal);
287 fiy = __fpnmsub(fiy,dy3,fscal);
288 fiz = __fpnmsub(fiz,dz3,fscal);
290 fjx3 = __fpmadd(fjx3,dx3,fscal);
291 fjy3 = __fpmadd(fjy3,dy3,fscal);
292 fjz3 = __fpmadd(fjz3,dz3,fscal);
294 split_and_store6(faction,j13,j23,fjx3,fjy3,fjz3);
300 real _dx,_dy,_dz,_rsq,_rinv;
301 real _rinvsq,_krsq,_fscal;
302 real _r,_rt,_eps,_Y,_F,_G,_H,_GHeps,_VV,_FF,_fijC,_fijD,_n0,_qq;
303 real _isaprod,_isaj,_iqq,_dvdatmp,_vcoul,_gbscale;
304 real _c6,_c12,_cexp1,_cexp2,_rinvsix,_Vvdw6,_Vvdw12,_Vvdwexp;
312 _dx = __creal(ix) - pos[j3+0];
313 _dy = __creal(iy) - pos[j3+1];
314 _dz = __creal(iz) - pos[j3+2];
316 _qq = _iq * charge[jnr];
318 _rsq = _dx*_dx + _dy*_dy + _dz*_dz;
320 _rinv = __frsqrte(_rsq);
322 _rinv = sqrt_newton_scalar(_rinv,_rsq);
326 _rinv = sqrt_newton_scalar(_rinv,_rsq);
329 FULL_INTERACTION_(_qq,_rinv,_rsq,jnr);
335 faction[j3+0] += _fscal*_dx;
336 faction[j3+1] += _fscal*_dy;
337 faction[j3+2] += _fscal*_dz;
344 sum_and_add3(faction,ii3,fix,fiy,fiz);
345 sum_and_add3(fshift ,is3,fix,fiy,fiz);
348 #if COULOMB != COULOMB_NONE
349 Vc[ggid] += __creal(vctot) + __cimag(vctot);
350 #if COULOMB == GENERALIZED_BORN
351 dvda[ii] += __creal(dvdasum) + __cimag(dvdasum);
356 Vvdw[ggid] += __creal(Vvdwtot) + __cimag(Vvdwtot);