2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel323_adress.h"
32 * Gromacs nonbonded kernel nb_kernel323_adress_cg
33 * Coulomb interaction: Tabulated
34 * VdW interaction: Buckingham
35 * water optimization: TIP4P - other atoms
36 * Calculate forces: yes
38 void nb_kernel323_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
87 real Y,F,Geps,Heps2,Fp,VV;
91 real ix1,iy1,iz1,fix1,fiy1,fiz1;
92 real ix2,iy2,iz2,fix2,fiy2,fiz2;
93 real ix3,iy3,iz3,fix3,fiy3,fiz3;
94 real ix4,iy4,iz4,fix4,fiy4,fiz4;
95 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
96 real dx11,dy11,dz11,rsq11,rinv11;
97 real dx21,dy21,dz21,rsq21,rinv21;
98 real dx31,dy31,dz31,rsq31,rinv31;
99 real dx41,dy41,dz41,rsq41,rinv41;
102 real weight_cg1, weight_cg2, weight_product;
107 nthreads = *p_nthreads;
111 tabscale = *p_tabscale;
113 qH = facel*charge[ii+1];
114 qM = facel*charge[ii+3];
115 nti = 3*ntype*type[ii];
122 #ifdef GMX_THREAD_SHM_FDECOMP
123 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
125 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
127 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
134 for(n=nn0; (n<nn1); n++)
138 shY = shiftvec[is3+1];
139 shZ = shiftvec[is3+2];
144 ix1 = shX + pos[ii3+0];
145 iy1 = shY + pos[ii3+1];
146 iz1 = shZ + pos[ii3+2];
147 ix2 = shX + pos[ii3+3];
148 iy2 = shY + pos[ii3+4];
149 iz2 = shZ + pos[ii3+5];
150 ix3 = shX + pos[ii3+6];
151 iy3 = shY + pos[ii3+7];
152 iz3 = shZ + pos[ii3+8];
153 ix4 = shX + pos[ii3+9];
154 iy4 = shY + pos[ii3+10];
155 iz4 = shZ + pos[ii3+11];
172 for(k=nj0; (k<nj1); k++)
175 weight_cg2 = wf[jnr];
176 weight_product = weight_cg1*weight_cg2;
177 if (weight_product < ALMOST_ZERO) {
180 else if (weight_product >= ALMOST_ONE)
182 /* force is zero, skip this molecule */
187 hybscal = 1.0 - weight_product;
196 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
200 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
204 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
208 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
209 rinv11 = 1.0/sqrt(rsq11);
210 rinv21 = 1.0/sqrt(rsq21);
211 rinv31 = 1.0/sqrt(rsq31);
212 rinv41 = 1.0/sqrt(rsq41);
213 tj = nti+3*type[jnr];
215 cexp1 = vdwparam[tj+1];
216 cexp2 = vdwparam[tj+2];
217 rinvsq = rinv11*rinv11;
218 rinvsix = rinvsq*rinvsq*rinvsq;
220 br = cexp2*rsq11*rinv11;
221 Vvdwexp = cexp1*exp(-br);
222 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6;
223 fscal = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
231 fjx1 = faction[j3+0] - tx;
232 fjy1 = faction[j3+1] - ty;
233 fjz1 = faction[j3+2] - tz;
244 Geps = eps*VFtab[nnn+2];
245 Heps2 = eps2*VFtab[nnn+3];
248 FF = Fp+Geps+2.0*Heps2;
251 vctot = vctot + vcoul;
252 fscal = -((fijC)*tabscale)*rinv21;
271 Geps = eps*VFtab[nnn+2];
272 Heps2 = eps2*VFtab[nnn+3];
275 FF = Fp+Geps+2.0*Heps2;
278 vctot = vctot + vcoul;
279 fscal = -((fijC)*tabscale)*rinv31;
299 Geps = eps*VFtab[nnn+2];
300 Heps2 = eps2*VFtab[nnn+3];
303 FF = Fp+Geps+2.0*Heps2;
306 vctot = vctot + vcoul;
307 fscal = -((fijC)*tabscale)*rinv41;
315 faction[j3+0] = fjx1 - tx;
316 faction[j3+1] = fjy1 - ty;
317 faction[j3+2] = fjz1 - tz;
320 faction[ii3+0] = faction[ii3+0] + fix1;
321 faction[ii3+1] = faction[ii3+1] + fiy1;
322 faction[ii3+2] = faction[ii3+2] + fiz1;
323 faction[ii3+3] = faction[ii3+3] + fix2;
324 faction[ii3+4] = faction[ii3+4] + fiy2;
325 faction[ii3+5] = faction[ii3+5] + fiz2;
326 faction[ii3+6] = faction[ii3+6] + fix3;
327 faction[ii3+7] = faction[ii3+7] + fiy3;
328 faction[ii3+8] = faction[ii3+8] + fiz3;
329 faction[ii3+9] = faction[ii3+9] + fix4;
330 faction[ii3+10] = faction[ii3+10] + fiy4;
331 faction[ii3+11] = faction[ii3+11] + fiz4;
332 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
333 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
334 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
336 Vc[ggid] = Vc[ggid] + vctot;
337 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
338 ninner = ninner + nj1 - nj0;
341 nouter = nouter + nn1 - nn0;
354 * Gromacs nonbonded kernel nb_kernel323_adress_ex
355 * Coulomb interaction: Tabulated
356 * VdW interaction: Buckingham
357 * water optimization: TIP4P - other atoms
358 * Calculate forces: yes
360 void nb_kernel323_adress_ex(
394 int nri,ntype,nthreads;
395 real facel,krf,crf,tabscale,gbtabscale;
396 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
397 int nn0,nn1,nouter,ninner;
409 real Y,F,Geps,Heps2,Fp,VV;
413 real ix1,iy1,iz1,fix1,fiy1,fiz1;
414 real ix2,iy2,iz2,fix2,fiy2,fiz2;
415 real ix3,iy3,iz3,fix3,fiy3,fiz3;
416 real ix4,iy4,iz4,fix4,fiy4,fiz4;
417 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
418 real dx11,dy11,dz11,rsq11,rinv11;
419 real dx21,dy21,dz21,rsq21,rinv21;
420 real dx31,dy31,dz31,rsq31,rinv31;
421 real dx41,dy41,dz41,rsq41,rinv41;
424 real weight_cg1, weight_cg2, weight_product;
429 nthreads = *p_nthreads;
433 tabscale = *p_tabscale;
435 qH = facel*charge[ii+1];
436 qM = facel*charge[ii+3];
437 nti = 3*ntype*type[ii];
444 #ifdef GMX_THREAD_SHM_FDECOMP
445 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
447 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
449 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
456 for(n=nn0; (n<nn1); n++)
460 shY = shiftvec[is3+1];
461 shZ = shiftvec[is3+2];
466 ix1 = shX + pos[ii3+0];
467 iy1 = shY + pos[ii3+1];
468 iz1 = shZ + pos[ii3+2];
469 ix2 = shX + pos[ii3+3];
470 iy2 = shY + pos[ii3+4];
471 iz2 = shZ + pos[ii3+5];
472 ix3 = shX + pos[ii3+6];
473 iy3 = shY + pos[ii3+7];
474 iz3 = shZ + pos[ii3+8];
475 ix4 = shX + pos[ii3+9];
476 iy4 = shY + pos[ii3+10];
477 iz4 = shZ + pos[ii3+11];
494 for(k=nj0; (k<nj1); k++)
497 weight_cg2 = wf[jnr];
498 weight_product = weight_cg1*weight_cg2;
499 if (weight_product < ALMOST_ZERO) {
500 /* force is zero, skip this molecule */
503 else if (weight_product >= ALMOST_ONE)
509 hybscal = weight_product;
518 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
522 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
526 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
530 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
531 rinv11 = 1.0/sqrt(rsq11);
532 rinv21 = 1.0/sqrt(rsq21);
533 rinv31 = 1.0/sqrt(rsq31);
534 rinv41 = 1.0/sqrt(rsq41);
535 tj = nti+3*type[jnr];
537 cexp1 = vdwparam[tj+1];
538 cexp2 = vdwparam[tj+2];
539 rinvsq = rinv11*rinv11;
540 rinvsix = rinvsq*rinvsq*rinvsq;
542 br = cexp2*rsq11*rinv11;
543 Vvdwexp = cexp1*exp(-br);
544 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6;
545 fscal = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
547 if(force_cap>0 && (fabs(fscal)> force_cap)){
548 fscal=force_cap*fscal/fabs(fscal);
556 fjx1 = faction[j3+0] - tx;
557 fjy1 = faction[j3+1] - ty;
558 fjz1 = faction[j3+2] - tz;
569 Geps = eps*VFtab[nnn+2];
570 Heps2 = eps2*VFtab[nnn+3];
573 FF = Fp+Geps+2.0*Heps2;
576 vctot = vctot + vcoul;
577 fscal = -((fijC)*tabscale)*rinv21;
579 if(force_cap>0 && (fabs(fscal)> force_cap)){
580 fscal=force_cap*fscal/fabs(fscal);
599 Geps = eps*VFtab[nnn+2];
600 Heps2 = eps2*VFtab[nnn+3];
603 FF = Fp+Geps+2.0*Heps2;
606 vctot = vctot + vcoul;
607 fscal = -((fijC)*tabscale)*rinv31;
609 if(force_cap>0 && (fabs(fscal)> force_cap)){
610 fscal=force_cap*fscal/fabs(fscal);
630 Geps = eps*VFtab[nnn+2];
631 Heps2 = eps2*VFtab[nnn+3];
634 FF = Fp+Geps+2.0*Heps2;
637 vctot = vctot + vcoul;
638 fscal = -((fijC)*tabscale)*rinv41;
640 if(force_cap>0 && (fabs(fscal)> force_cap)){
641 fscal=force_cap*fscal/fabs(fscal);
649 faction[j3+0] = fjx1 - tx;
650 faction[j3+1] = fjy1 - ty;
651 faction[j3+2] = fjz1 - tz;
654 faction[ii3+0] = faction[ii3+0] + fix1;
655 faction[ii3+1] = faction[ii3+1] + fiy1;
656 faction[ii3+2] = faction[ii3+2] + fiz1;
657 faction[ii3+3] = faction[ii3+3] + fix2;
658 faction[ii3+4] = faction[ii3+4] + fiy2;
659 faction[ii3+5] = faction[ii3+5] + fiz2;
660 faction[ii3+6] = faction[ii3+6] + fix3;
661 faction[ii3+7] = faction[ii3+7] + fiy3;
662 faction[ii3+8] = faction[ii3+8] + fiz3;
663 faction[ii3+9] = faction[ii3+9] + fix4;
664 faction[ii3+10] = faction[ii3+10] + fiy4;
665 faction[ii3+11] = faction[ii3+11] + fiz4;
666 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
667 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
668 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
670 Vc[ggid] = Vc[ggid] + vctot;
671 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
672 ninner = ninner + nj1 - nj0;
675 nouter = nouter + nn1 - nn0;