2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel133_adress.h"
32 * Gromacs nonbonded kernel nb_kernel133_adress_cg
33 * Coulomb interaction: Normal Coulomb
34 * VdW interaction: Tabulated
35 * water optimization: TIP4P - other atoms
36 * Calculate forces: yes
38 void nb_kernel133_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
87 real Y,F,Geps,Heps2,Fp,VV;
90 real ix1,iy1,iz1,fix1,fiy1,fiz1;
91 real ix2,iy2,iz2,fix2,fiy2,fiz2;
92 real ix3,iy3,iz3,fix3,fiy3,fiz3;
93 real ix4,iy4,iz4,fix4,fiy4,fiz4;
94 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
95 real dx11,dy11,dz11,rsq11,rinv11;
96 real dx21,dy21,dz21,rsq21,rinv21;
97 real dx31,dy31,dz31,rsq31,rinv31;
98 real dx41,dy41,dz41,rsq41,rinv41;
101 real weight_cg1, weight_cg2, weight_product;
106 nthreads = *p_nthreads;
110 tabscale = *p_tabscale;
112 qH = facel*charge[ii+1];
113 qM = facel*charge[ii+3];
114 nti = 2*ntype*type[ii];
121 #ifdef GMX_THREAD_SHM_FDECOMP
122 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
124 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
126 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
133 for(n=nn0; (n<nn1); n++)
137 shY = shiftvec[is3+1];
138 shZ = shiftvec[is3+2];
143 ix1 = shX + pos[ii3+0];
144 iy1 = shY + pos[ii3+1];
145 iz1 = shZ + pos[ii3+2];
146 ix2 = shX + pos[ii3+3];
147 iy2 = shY + pos[ii3+4];
148 iz2 = shZ + pos[ii3+5];
149 ix3 = shX + pos[ii3+6];
150 iy3 = shY + pos[ii3+7];
151 iz3 = shZ + pos[ii3+8];
152 ix4 = shX + pos[ii3+9];
153 iy4 = shY + pos[ii3+10];
154 iz4 = shZ + pos[ii3+11];
171 for(k=nj0; (k<nj1); k++)
174 weight_cg2 = wf[jnr];
175 weight_product = weight_cg1*weight_cg2;
176 if (weight_product < ALMOST_ZERO) {
179 else if (weight_product >= ALMOST_ONE)
181 /* force is zero, skip this molecule */
186 hybscal = 1.0 - weight_product;
195 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
199 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
203 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
207 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
208 rinv11 = 1.0/sqrt(rsq11);
209 rinv21 = 1.0/sqrt(rsq21);
210 rinv31 = 1.0/sqrt(rsq31);
211 rinv41 = 1.0/sqrt(rsq41);
212 tj = nti+2*type[jnr];
214 c12 = vdwparam[tj+1];
223 Geps = eps*VFtab[nnn+2];
224 Heps2 = eps2*VFtab[nnn+3];
227 FF = Fp+Geps+2.0*Heps2;
233 Geps = eps*VFtab[nnn+2];
234 Heps2 = eps2*VFtab[nnn+3];
237 FF = Fp+Geps+2.0*Heps2;
240 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
241 fscal = -((fijD+fijR)*tabscale)*rinv11;
249 fjx1 = faction[j3+0] - tx;
250 fjy1 = faction[j3+1] - ty;
251 fjz1 = faction[j3+2] - tz;
254 rinvsq = rinv21*rinv21;
257 fscal = (vcoul)*rinvsq;
268 rinvsq = rinv31*rinv31;
271 fscal = (vcoul)*rinvsq;
283 rinvsq = rinv41*rinv41;
286 fscal = (vcoul)*rinvsq;
294 faction[j3+0] = fjx1 - tx;
295 faction[j3+1] = fjy1 - ty;
296 faction[j3+2] = fjz1 - tz;
299 faction[ii3+0] = faction[ii3+0] + fix1;
300 faction[ii3+1] = faction[ii3+1] + fiy1;
301 faction[ii3+2] = faction[ii3+2] + fiz1;
302 faction[ii3+3] = faction[ii3+3] + fix2;
303 faction[ii3+4] = faction[ii3+4] + fiy2;
304 faction[ii3+5] = faction[ii3+5] + fiz2;
305 faction[ii3+6] = faction[ii3+6] + fix3;
306 faction[ii3+7] = faction[ii3+7] + fiy3;
307 faction[ii3+8] = faction[ii3+8] + fiz3;
308 faction[ii3+9] = faction[ii3+9] + fix4;
309 faction[ii3+10] = faction[ii3+10] + fiy4;
310 faction[ii3+11] = faction[ii3+11] + fiz4;
311 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
312 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
313 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
315 Vc[ggid] = Vc[ggid] + vctot;
316 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
317 ninner = ninner + nj1 - nj0;
320 nouter = nouter + nn1 - nn0;
333 * Gromacs nonbonded kernel nb_kernel133_adress_ex
334 * Coulomb interaction: Normal Coulomb
335 * VdW interaction: Tabulated
336 * water optimization: TIP4P - other atoms
337 * Calculate forces: yes
339 void nb_kernel133_adress_ex(
373 int nri,ntype,nthreads;
374 real facel,krf,crf,tabscale,gbtabscale;
375 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
376 int nn0,nn1,nouter,ninner;
388 real Y,F,Geps,Heps2,Fp,VV;
391 real ix1,iy1,iz1,fix1,fiy1,fiz1;
392 real ix2,iy2,iz2,fix2,fiy2,fiz2;
393 real ix3,iy3,iz3,fix3,fiy3,fiz3;
394 real ix4,iy4,iz4,fix4,fiy4,fiz4;
395 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
396 real dx11,dy11,dz11,rsq11,rinv11;
397 real dx21,dy21,dz21,rsq21,rinv21;
398 real dx31,dy31,dz31,rsq31,rinv31;
399 real dx41,dy41,dz41,rsq41,rinv41;
402 real weight_cg1, weight_cg2, weight_product;
407 nthreads = *p_nthreads;
411 tabscale = *p_tabscale;
413 qH = facel*charge[ii+1];
414 qM = facel*charge[ii+3];
415 nti = 2*ntype*type[ii];
422 #ifdef GMX_THREAD_SHM_FDECOMP
423 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
425 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
427 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
434 for(n=nn0; (n<nn1); n++)
438 shY = shiftvec[is3+1];
439 shZ = shiftvec[is3+2];
444 ix1 = shX + pos[ii3+0];
445 iy1 = shY + pos[ii3+1];
446 iz1 = shZ + pos[ii3+2];
447 ix2 = shX + pos[ii3+3];
448 iy2 = shY + pos[ii3+4];
449 iz2 = shZ + pos[ii3+5];
450 ix3 = shX + pos[ii3+6];
451 iy3 = shY + pos[ii3+7];
452 iz3 = shZ + pos[ii3+8];
453 ix4 = shX + pos[ii3+9];
454 iy4 = shY + pos[ii3+10];
455 iz4 = shZ + pos[ii3+11];
472 for(k=nj0; (k<nj1); k++)
475 weight_cg2 = wf[jnr];
476 weight_product = weight_cg1*weight_cg2;
477 if (weight_product < ALMOST_ZERO) {
478 /* force is zero, skip this molecule */
481 else if (weight_product >= ALMOST_ONE)
487 hybscal = weight_product;
496 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
500 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
504 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
508 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
509 rinv11 = 1.0/sqrt(rsq11);
510 rinv21 = 1.0/sqrt(rsq21);
511 rinv31 = 1.0/sqrt(rsq31);
512 rinv41 = 1.0/sqrt(rsq41);
513 tj = nti+2*type[jnr];
515 c12 = vdwparam[tj+1];
524 Geps = eps*VFtab[nnn+2];
525 Heps2 = eps2*VFtab[nnn+3];
528 FF = Fp+Geps+2.0*Heps2;
534 Geps = eps*VFtab[nnn+2];
535 Heps2 = eps2*VFtab[nnn+3];
538 FF = Fp+Geps+2.0*Heps2;
541 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
542 fscal = -((fijD+fijR)*tabscale)*rinv11;
544 if(force_cap>0 && (fabs(fscal)> force_cap)){
545 fscal=force_cap*fscal/fabs(fscal);
553 fjx1 = faction[j3+0] - tx;
554 fjy1 = faction[j3+1] - ty;
555 fjz1 = faction[j3+2] - tz;
558 rinvsq = rinv21*rinv21;
561 fscal = (vcoul)*rinvsq;
563 if(force_cap>0 && (fabs(fscal)> force_cap)){
564 fscal=force_cap*fscal/fabs(fscal);
575 rinvsq = rinv31*rinv31;
578 fscal = (vcoul)*rinvsq;
580 if(force_cap>0 && (fabs(fscal)> force_cap)){
581 fscal=force_cap*fscal/fabs(fscal);
593 rinvsq = rinv41*rinv41;
596 fscal = (vcoul)*rinvsq;
598 if(force_cap>0 && (fabs(fscal)> force_cap)){
599 fscal=force_cap*fscal/fabs(fscal);
607 faction[j3+0] = fjx1 - tx;
608 faction[j3+1] = fjy1 - ty;
609 faction[j3+2] = fjz1 - tz;
612 faction[ii3+0] = faction[ii3+0] + fix1;
613 faction[ii3+1] = faction[ii3+1] + fiy1;
614 faction[ii3+2] = faction[ii3+2] + fiz1;
615 faction[ii3+3] = faction[ii3+3] + fix2;
616 faction[ii3+4] = faction[ii3+4] + fiy2;
617 faction[ii3+5] = faction[ii3+5] + fiz2;
618 faction[ii3+6] = faction[ii3+6] + fix3;
619 faction[ii3+7] = faction[ii3+7] + fiy3;
620 faction[ii3+8] = faction[ii3+8] + fiz3;
621 faction[ii3+9] = faction[ii3+9] + fix4;
622 faction[ii3+10] = faction[ii3+10] + fiy4;
623 faction[ii3+11] = faction[ii3+11] + fiz4;
624 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
625 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
626 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
628 Vc[ggid] = Vc[ggid] + vctot;
629 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
630 ninner = ninner + nj1 - nj0;
633 nouter = nouter + nn1 - nn0;