1 #include "types/simple.h"
3 #undef FULL_INTERACTION
4 #undef FULL_INTERACTION_
5 #undef COUL_INTERACTION
6 #undef COUL_INTERACTION_
10 #define FULL_INTERACTION nfcalc_interaction
11 #define FULL_INTERACTION_ nfcalc_interaction_
12 #define COUL_INTERACTION nfcalc_coul_interaction
13 #define COUL_INTERACTION_ nfcalc_coul_interaction_
17 #define FULL_INTERACTION calc_interaction
18 #define FULL_INTERACTION_ calc_interaction_
19 #define COUL_INTERACTION calc_coul_interaction
20 #define COUL_INTERACTION_ calc_coul_interaction_
58 double _Complex qO,qH;
59 double _Complex zero = __cmplx(0.0,0.0);
60 double _Complex rone = __cmplx(1.0,0.0);
61 double _Complex three = __cmplx(3.0,3.0);
62 double conv1[2],conv2[2],conv3[2];
64 real _qO,_qH,_facel,_tabscale,_gbtabscale,_krf,_crf;
66 int nri,ntype,nthreads,n,ii,nj0,nj1,nti;
68 #pragma disjoint(*shiftvec,*fshift,*pos,*faction,*charge,*p_facel,*p_krf,*p_crf,*Vc, \
69 *vdwparam,*Vvdw,*p_tabscale,*VFtab,*invsqrta,*dvda,*p_gbtabscale,*GBtab,*work)
73 nthreads = *p_nthreads;
75 #if (COULOMB == COULOMB_TAB || VDW == VDW_TAB)
76 _tabscale = *p_tabscale;
80 #if COULOMB == REACTION_FIELD
87 #if COULOMB == GENERALIZED_BORN
88 _gbtabscale = *p_gbtabscale;
94 _qO = _facel * charge[ii];
95 _qH = _facel * charge[ii+1];
96 qO = __cmplx(_qO,_qO);
97 qH = __cmplx(_qH,_qH);
99 nti = 3*ntype*type[ii];
101 nti = 2*ntype*type[ii];
106 for(n=0; (n<nri); n++)
108 double _Complex ix1,ix2,ix3,iy1,iy2,iy3,iz1,iz2,iz3,ix23,iy23,iz23,fix,fiy,fiz;
110 // initialize potential energies and forces for this paricle
112 double _Complex vctot = zero;
113 double _Complex Vvdwtot = zero;
114 double _Complex fix1 = zero;
115 double _Complex fix2 = zero;
116 double _Complex fix3 = zero;
117 double _Complex fiy1 = zero;
118 double _Complex fiy2 = zero;
119 double _Complex fiy3 = zero;
120 double _Complex fiz1 = zero;
121 double _Complex fiz2 = zero;
122 double _Complex fiz3 = zero;
124 // shift is the position of the n-th water group
126 int is3 = 3*shift[n];
128 // shiftvec is the center of a water group
130 real _shX = shiftvec[is3+0];
131 real _shY = shiftvec[is3+1];
132 real _shZ = shiftvec[is3+2];
138 // add the shift vector to all water atoms
140 real _ix1 = _shX + pos[ii3+0];
141 real _iy1 = _shY + pos[ii3+1];
142 real _iz1 = _shZ + pos[ii3+2];
143 real _ix2 = _shX + pos[ii3+3];
144 real _iy2 = _shY + pos[ii3+4];
145 real _iz2 = _shZ + pos[ii3+5];
146 real _ix3 = _shX + pos[ii3+6];
147 real _iy3 = _shY + pos[ii3+7];
148 real _iz3 = _shZ + pos[ii3+8];
150 // clone all positions in complex variables
152 ix1 = __cmplx(_ix1,_ix1);
153 iy1 = __cmplx(_iy1,_iy1);
154 iz1 = __cmplx(_iz1,_iz1);
156 ix2 = __cmplx(_ix2,_ix2);
157 iy2 = __cmplx(_iy2,_iy2);
158 iz2 = __cmplx(_iz2,_iz2);
160 ix3 = __cmplx(_ix3,_ix3);
161 iy3 = __cmplx(_iy3,_iy3);
162 iz3 = __cmplx(_iz3,_iz3);
167 // unrolled twice for SIMDization
169 for(k=nj0; (k<nj1-1); k+=2)
171 double _Complex dx11,dx21,dx31,jx,fjx;
172 double _Complex dy11,dy21,dy31,jy,fjy;
173 double _Complex dz11,dz21,dz31,jz,fjz;
174 double _Complex rsq11,rsq21,rsq31;
175 double _Complex rinv11,rinv21,rinv31;
176 double _Complex rinvsq,krsq,fscal;
177 double _Complex rt,rti,r,n0,eps,Y,F,G,H,GHeps,VV,FF,fijC,fijD,qj,qq;
178 double _Complex c6,c12,cexp1,cexp2,Vvdwexp,Vvdw6,Vvdw12,rinvsix;
181 int jnr1,jnr2,j1,j2,tj1,tj2;
189 load6_and_merge(pos,j1,j2,jx,jy,jz);
191 dx11 = __fpsub(ix1,jx);
192 dy11 = __fpsub(iy1,jy);
193 dz11 = __fpsub(iz1,jz);
194 dx21 = __fpsub(ix2,jx);
195 dy21 = __fpsub(iy2,jy);
196 dz21 = __fpsub(iz2,jz);
197 dx31 = __fpsub(ix3,jx);
198 dy31 = __fpsub(iy3,jy);
199 dz31 = __fpsub(iz3,jz);
201 qj = __cmplx(charge[jnr1],charge[jnr2]);
203 rsq11 = vdist2(dx11,dy11,dz11);
204 rsq21 = vdist2(dx21,dy21,dz21);
205 rsq31 = vdist2(dx31,dy31,dz31);
207 rinv11 = __fprsqrte(rsq11);
208 rinv21 = __fprsqrte(rsq21);
209 rinv31 = __fprsqrte(rsq31);
211 rinv11 = sqrt_newton(rinv11,rsq11);
212 rinv21 = sqrt_newton(rinv21,rsq21);
213 rinv31 = sqrt_newton(rinv31,rsq31);
217 rinv11 = sqrt_newton(rinv11,rsq11);
218 rinv21 = sqrt_newton(rinv21,rsq21);
219 rinv31 = sqrt_newton(rinv31,rsq31);
225 load6_and_merge(faction,j1,j2,fjx,fjy,fjz);
228 FULL_INTERACTION(qq,rinv11,rsq11,conv1,jnr1,jnr2);
233 fix1 = __fpnmsub(fix1,dx11,fscal);
234 fiy1 = __fpnmsub(fiy1,dy11,fscal);
235 fiz1 = __fpnmsub(fiz1,dz11,fscal);
237 fjx = __fpmadd(fjx,dx11,fscal);
238 fjy = __fpmadd(fjy,dy11,fscal);
239 fjz = __fpmadd(fjz,dz11,fscal);
242 COUL_INTERACTION(qq,rinv21,rsq21,conv2,jnr1,jnr2);
245 fix2 = __fpnmsub(fix2,dx21,fscal);
246 fiy2 = __fpnmsub(fiy2,dy21,fscal);
247 fiz2 = __fpnmsub(fiz2,dz21,fscal);
249 fjx = __fpmadd(fjx,dx21,fscal);
250 fjy = __fpmadd(fjy,dy21,fscal);
251 fjz = __fpmadd(fjz,dz21,fscal);
254 COUL_INTERACTION(qq,rinv31,rsq31,conv3,jnr1,jnr2);
257 fix3 = __fpnmsub(fix3,dx31,fscal);
258 fiy3 = __fpnmsub(fiy3,dy31,fscal);
259 fiz3 = __fpnmsub(fiz3,dz31,fscal);
261 fjx = __fpmadd(fjx,dx31,fscal);
262 fjy = __fpmadd(fjy,dy31,fscal);
263 fjz = __fpmadd(fjz,dz31,fscal);
265 split_and_store6(faction,j1,j2,fjx,fjy,fjz);
269 ix23 = __cmplx(_ix2,_ix3);
270 iy23 = __cmplx(_iy2,_iy3);
271 iz23 = __cmplx(_iz2,_iz3);
274 // actually we should not simdize the remainder loop, because it's slower
278 double _Complex dx23,dy23,dz23,tx,ty,tz,jx,jy,jz;
279 double _Complex fjx,fjy,fjz;
280 double _Complex rsq23,rinv23;
281 double _Complex rinvsq,krsq,fscal;
282 double _Complex rt,rti,r,eps,Y,F,G,H,GHeps,VV,FF,fijC,fijD,n0,qq;
284 real _dx11,_dy11,_dz11,_rsq11,_rinv11;
285 real _rinvsq,_krsq,_fscal;
286 real _rt,_r,_eps,_Y,_F,_H,_G,_GHeps,_VV,_FF,_fijC,_fijD,_n0,_qq;
287 real _qj,_c6,_c12,_cexp1,_cexp2,_Vvdwexp,_Vvdw6,_Vvdw12,_rinvsix;
295 load3_and_clone(pos,j3,jx,jy,jz);
297 _dx11 = __creal(ix1) - __creal(jx);
298 _dy11 = __creal(iy1) - __creal(jy);
299 _dz11 = __creal(iz1) - __creal(jz);
300 dx23 = __fpsub(ix23,jx);
301 dy23 = __fpsub(iy23,jy);
302 dz23 = __fpsub(iz23,jz);
304 _rsq11 = _dx11 * _dx11 + _dy11 * _dy11 + _dz11 * _dz11;
305 rsq23 = vdist2(dx23,dy23,dz23);
307 _rinv11 = __frsqrte(_rsq11);
308 rinv23 = __fprsqrte(rsq23 );
310 _rinv11 = sqrt_newton_scalar(_rinv11,_rsq11);
311 rinv23 = sqrt_newton(rinv23,rsq23);
315 _rinv11 = sqrt_newton_scalar(_rinv11,_rsq11);
316 rinv23 = sqrt_newton(rinv23,rsq23);
320 _qq = _qO * charge[jnr];
323 load3_real(faction,j3,fjx,fjy,fjz);
326 FULL_INTERACTION_(_qq,_rinv11,_rsq11,jnr);
328 qq = __fxpmul(qH,charge[jnr]);
331 fix1 -= _fscal * _dx11;
332 fiy1 -= _fscal * _dy11;
333 fiz1 -= _fscal * _dz11;
335 fjx += _fscal * _dx11;
336 fjy += _fscal * _dy11;
337 fjz += _fscal * _dz11;
340 COUL_INTERACTION(qq,rinv23,rsq23,conv1,jnr,jnr);
343 tx = __fpmul(fscal,dx23);
344 ty = __fpmul(fscal,dy23);
345 tz = __fpmul(fscal,dz23);
347 fix2 = __fpnmsub(fix2,rone,tx);
348 fiy2 = __fpnmsub(fiy2,rone,ty);
349 fiz2 = __fpnmsub(fiz2,rone,tz);
350 fix3 = __fxnmsub(fix3,rone,tx);
351 fiy3 = __fxnmsub(fiy3,rone,ty);
352 fiz3 = __fxnmsub(fiz3,rone,tz);
354 fjx = __fpadd(fjx,tx);
355 fjy = __fpadd(fjy,ty);
356 fjz = __fpadd(fjz,tz);
358 sum_and_store3(faction,j3,fjx,fjy,fjz);
365 sum_and_add3(faction,ii3 ,fix1,fiy1,fiz1);
366 sum_and_add3(faction,ii3+3,fix2,fiy2,fiz2);
367 sum_and_add3(faction,ii3+6,fix3,fiy3,fiz3);
369 fix = __fpadd(fix1,__fpadd(fix2,fix3));
370 fiy = __fpadd(fiy1,__fpadd(fiy2,fiy3));
371 fiz = __fpadd(fiz1,__fpadd(fiz2,fiz3));
373 sum_and_add3(fshift,is3,fix,fiy,fiz);
376 #if COULOMB != COULOMB_NONE
377 Vc[ggid] += __creal(vctot) + __cimag(vctot);
381 Vvdw[ggid] += __creal(Vvdwtot) + __cimag(Vvdwtot);