2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel332_adress.h"
32 * Gromacs nonbonded kernel nb_kernel332_adress_cg
33 * Coulomb interaction: Tabulated
34 * VdW interaction: Tabulated
35 * water optimization: pairs of SPC/TIP3P interactions
36 * Calculate forces: yes
38 void nb_kernel332_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
84 real Y,F,Geps,Heps2,Fp,VV;
88 real ix1,iy1,iz1,fix1,fiy1,fiz1;
89 real ix2,iy2,iz2,fix2,fiy2,fiz2;
90 real ix3,iy3,iz3,fix3,fiy3,fiz3;
91 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
92 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
93 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
94 real dx11,dy11,dz11,rsq11,rinv11;
95 real dx12,dy12,dz12,rsq12,rinv12;
96 real dx13,dy13,dz13,rsq13,rinv13;
97 real dx21,dy21,dz21,rsq21,rinv21;
98 real dx22,dy22,dz22,rsq22,rinv22;
99 real dx23,dy23,dz23,rsq23,rinv23;
100 real dx31,dy31,dz31,rsq31,rinv31;
101 real dx32,dy32,dz32,rsq32,rinv32;
102 real dx33,dy33,dz33,rsq33,rinv33;
103 real qO,qH,qqOO,qqOH,qqHH;
105 real weight_cg1, weight_cg2, weight_product;
110 nthreads = *p_nthreads;
114 tabscale = *p_tabscale;
121 tj = 2*(ntype+1)*type[ii];
123 c12 = vdwparam[tj+1];
130 #ifdef GMX_THREAD_SHM_FDECOMP
131 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
133 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
135 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
142 for(n=nn0; (n<nn1); n++)
146 shY = shiftvec[is3+1];
147 shZ = shiftvec[is3+2];
152 ix1 = shX + pos[ii3+0];
153 iy1 = shY + pos[ii3+1];
154 iz1 = shZ + pos[ii3+2];
155 ix2 = shX + pos[ii3+3];
156 iy2 = shY + pos[ii3+4];
157 iz2 = shZ + pos[ii3+5];
158 ix3 = shX + pos[ii3+6];
159 iy3 = shY + pos[ii3+7];
160 iz3 = shZ + pos[ii3+8];
174 for(k=nj0; (k<nj1); k++)
177 weight_cg2 = wf[jnr];
178 weight_product = weight_cg1*weight_cg2;
179 if (weight_product < ALMOST_ZERO) {
182 else if (weight_product >= ALMOST_ONE)
184 /* force is zero, skip this molecule */
189 hybscal = 1.0 - weight_product;
204 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
208 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
212 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
216 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
220 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
224 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
228 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
232 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
236 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
237 rinv11 = 1.0/sqrt(rsq11);
238 rinv12 = 1.0/sqrt(rsq12);
239 rinv13 = 1.0/sqrt(rsq13);
240 rinv21 = 1.0/sqrt(rsq21);
241 rinv22 = 1.0/sqrt(rsq22);
242 rinv23 = 1.0/sqrt(rsq23);
243 rinv31 = 1.0/sqrt(rsq31);
244 rinv32 = 1.0/sqrt(rsq32);
245 rinv33 = 1.0/sqrt(rsq33);
255 Geps = eps*VFtab[nnn+2];
256 Heps2 = eps2*VFtab[nnn+3];
259 FF = Fp+Geps+2.0*Heps2;
262 vctot = vctot + vcoul;
266 Geps = eps*VFtab[nnn+2];
267 Heps2 = eps2*VFtab[nnn+3];
270 FF = Fp+Geps+2.0*Heps2;
276 Geps = eps*VFtab[nnn+2];
277 Heps2 = eps2*VFtab[nnn+3];
280 FF = Fp+Geps+2.0*Heps2;
283 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
284 fscal = -((fijC+fijD+fijR)*tabscale)*rinv11;
292 fjx1 = faction[j3+0] - tx;
293 fjy1 = faction[j3+1] - ty;
294 fjz1 = faction[j3+2] - tz;
304 Geps = eps*VFtab[nnn+2];
305 Heps2 = eps2*VFtab[nnn+3];
308 FF = Fp+Geps+2.0*Heps2;
311 vctot = vctot + vcoul;
312 fscal = -((fijC)*tabscale)*rinv12;
320 fjx2 = faction[j3+3] - tx;
321 fjy2 = faction[j3+4] - ty;
322 fjz2 = faction[j3+5] - tz;
332 Geps = eps*VFtab[nnn+2];
333 Heps2 = eps2*VFtab[nnn+3];
336 FF = Fp+Geps+2.0*Heps2;
339 vctot = vctot + vcoul;
340 fscal = -((fijC)*tabscale)*rinv13;
348 fjx3 = faction[j3+6] - tx;
349 fjy3 = faction[j3+7] - ty;
350 fjz3 = faction[j3+8] - tz;
360 Geps = eps*VFtab[nnn+2];
361 Heps2 = eps2*VFtab[nnn+3];
364 FF = Fp+Geps+2.0*Heps2;
367 vctot = vctot + vcoul;
368 fscal = -((fijC)*tabscale)*rinv21;
388 Geps = eps*VFtab[nnn+2];
389 Heps2 = eps2*VFtab[nnn+3];
392 FF = Fp+Geps+2.0*Heps2;
395 vctot = vctot + vcoul;
396 fscal = -((fijC)*tabscale)*rinv22;
416 Geps = eps*VFtab[nnn+2];
417 Heps2 = eps2*VFtab[nnn+3];
420 FF = Fp+Geps+2.0*Heps2;
423 vctot = vctot + vcoul;
424 fscal = -((fijC)*tabscale)*rinv23;
444 Geps = eps*VFtab[nnn+2];
445 Heps2 = eps2*VFtab[nnn+3];
448 FF = Fp+Geps+2.0*Heps2;
451 vctot = vctot + vcoul;
452 fscal = -((fijC)*tabscale)*rinv31;
460 faction[j3+0] = fjx1 - tx;
461 faction[j3+1] = fjy1 - ty;
462 faction[j3+2] = fjz1 - tz;
472 Geps = eps*VFtab[nnn+2];
473 Heps2 = eps2*VFtab[nnn+3];
476 FF = Fp+Geps+2.0*Heps2;
479 vctot = vctot + vcoul;
480 fscal = -((fijC)*tabscale)*rinv32;
488 faction[j3+3] = fjx2 - tx;
489 faction[j3+4] = fjy2 - ty;
490 faction[j3+5] = fjz2 - tz;
500 Geps = eps*VFtab[nnn+2];
501 Heps2 = eps2*VFtab[nnn+3];
504 FF = Fp+Geps+2.0*Heps2;
507 vctot = vctot + vcoul;
508 fscal = -((fijC)*tabscale)*rinv33;
516 faction[j3+6] = fjx3 - tx;
517 faction[j3+7] = fjy3 - ty;
518 faction[j3+8] = fjz3 - tz;
521 faction[ii3+0] = faction[ii3+0] + fix1;
522 faction[ii3+1] = faction[ii3+1] + fiy1;
523 faction[ii3+2] = faction[ii3+2] + fiz1;
524 faction[ii3+3] = faction[ii3+3] + fix2;
525 faction[ii3+4] = faction[ii3+4] + fiy2;
526 faction[ii3+5] = faction[ii3+5] + fiz2;
527 faction[ii3+6] = faction[ii3+6] + fix3;
528 faction[ii3+7] = faction[ii3+7] + fiy3;
529 faction[ii3+8] = faction[ii3+8] + fiz3;
530 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
531 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
532 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
534 Vc[ggid] = Vc[ggid] + vctot;
535 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
536 ninner = ninner + nj1 - nj0;
539 nouter = nouter + nn1 - nn0;
552 * Gromacs nonbonded kernel nb_kernel332_adress_ex
553 * Coulomb interaction: Tabulated
554 * VdW interaction: Tabulated
555 * water optimization: pairs of SPC/TIP3P interactions
556 * Calculate forces: yes
558 void nb_kernel332_adress_ex(
592 int nri,ntype,nthreads;
593 real facel,krf,crf,tabscale,gbtabscale;
594 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
595 int nn0,nn1,nouter,ninner;
604 real Y,F,Geps,Heps2,Fp,VV;
608 real ix1,iy1,iz1,fix1,fiy1,fiz1;
609 real ix2,iy2,iz2,fix2,fiy2,fiz2;
610 real ix3,iy3,iz3,fix3,fiy3,fiz3;
611 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
612 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
613 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
614 real dx11,dy11,dz11,rsq11,rinv11;
615 real dx12,dy12,dz12,rsq12,rinv12;
616 real dx13,dy13,dz13,rsq13,rinv13;
617 real dx21,dy21,dz21,rsq21,rinv21;
618 real dx22,dy22,dz22,rsq22,rinv22;
619 real dx23,dy23,dz23,rsq23,rinv23;
620 real dx31,dy31,dz31,rsq31,rinv31;
621 real dx32,dy32,dz32,rsq32,rinv32;
622 real dx33,dy33,dz33,rsq33,rinv33;
623 real qO,qH,qqOO,qqOH,qqHH;
625 real weight_cg1, weight_cg2, weight_product;
630 nthreads = *p_nthreads;
634 tabscale = *p_tabscale;
641 tj = 2*(ntype+1)*type[ii];
643 c12 = vdwparam[tj+1];
650 #ifdef GMX_THREAD_SHM_FDECOMP
651 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
653 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
655 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
662 for(n=nn0; (n<nn1); n++)
666 shY = shiftvec[is3+1];
667 shZ = shiftvec[is3+2];
672 ix1 = shX + pos[ii3+0];
673 iy1 = shY + pos[ii3+1];
674 iz1 = shZ + pos[ii3+2];
675 ix2 = shX + pos[ii3+3];
676 iy2 = shY + pos[ii3+4];
677 iz2 = shZ + pos[ii3+5];
678 ix3 = shX + pos[ii3+6];
679 iy3 = shY + pos[ii3+7];
680 iz3 = shZ + pos[ii3+8];
694 for(k=nj0; (k<nj1); k++)
697 weight_cg2 = wf[jnr];
698 weight_product = weight_cg1*weight_cg2;
699 if (weight_product < ALMOST_ZERO) {
700 /* force is zero, skip this molecule */
703 else if (weight_product >= ALMOST_ONE)
709 hybscal = weight_product;
724 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
728 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
732 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
736 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
740 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
744 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
748 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
752 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
756 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
757 rinv11 = 1.0/sqrt(rsq11);
758 rinv12 = 1.0/sqrt(rsq12);
759 rinv13 = 1.0/sqrt(rsq13);
760 rinv21 = 1.0/sqrt(rsq21);
761 rinv22 = 1.0/sqrt(rsq22);
762 rinv23 = 1.0/sqrt(rsq23);
763 rinv31 = 1.0/sqrt(rsq31);
764 rinv32 = 1.0/sqrt(rsq32);
765 rinv33 = 1.0/sqrt(rsq33);
775 Geps = eps*VFtab[nnn+2];
776 Heps2 = eps2*VFtab[nnn+3];
779 FF = Fp+Geps+2.0*Heps2;
782 vctot = vctot + vcoul;
786 Geps = eps*VFtab[nnn+2];
787 Heps2 = eps2*VFtab[nnn+3];
790 FF = Fp+Geps+2.0*Heps2;
796 Geps = eps*VFtab[nnn+2];
797 Heps2 = eps2*VFtab[nnn+3];
800 FF = Fp+Geps+2.0*Heps2;
803 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
804 fscal = -((fijC+fijD+fijR)*tabscale)*rinv11;
806 if(force_cap>0 && (fabs(fscal)> force_cap)){
807 fscal=force_cap*fscal/fabs(fscal);
815 fjx1 = faction[j3+0] - tx;
816 fjy1 = faction[j3+1] - ty;
817 fjz1 = faction[j3+2] - tz;
827 Geps = eps*VFtab[nnn+2];
828 Heps2 = eps2*VFtab[nnn+3];
831 FF = Fp+Geps+2.0*Heps2;
834 vctot = vctot + vcoul;
835 fscal = -((fijC)*tabscale)*rinv12;
837 if(force_cap>0 && (fabs(fscal)> force_cap)){
838 fscal=force_cap*fscal/fabs(fscal);
846 fjx2 = faction[j3+3] - tx;
847 fjy2 = faction[j3+4] - ty;
848 fjz2 = faction[j3+5] - tz;
858 Geps = eps*VFtab[nnn+2];
859 Heps2 = eps2*VFtab[nnn+3];
862 FF = Fp+Geps+2.0*Heps2;
865 vctot = vctot + vcoul;
866 fscal = -((fijC)*tabscale)*rinv13;
868 if(force_cap>0 && (fabs(fscal)> force_cap)){
869 fscal=force_cap*fscal/fabs(fscal);
877 fjx3 = faction[j3+6] - tx;
878 fjy3 = faction[j3+7] - ty;
879 fjz3 = faction[j3+8] - tz;
889 Geps = eps*VFtab[nnn+2];
890 Heps2 = eps2*VFtab[nnn+3];
893 FF = Fp+Geps+2.0*Heps2;
896 vctot = vctot + vcoul;
897 fscal = -((fijC)*tabscale)*rinv21;
899 if(force_cap>0 && (fabs(fscal)> force_cap)){
900 fscal=force_cap*fscal/fabs(fscal);
920 Geps = eps*VFtab[nnn+2];
921 Heps2 = eps2*VFtab[nnn+3];
924 FF = Fp+Geps+2.0*Heps2;
927 vctot = vctot + vcoul;
928 fscal = -((fijC)*tabscale)*rinv22;
930 if(force_cap>0 && (fabs(fscal)> force_cap)){
931 fscal=force_cap*fscal/fabs(fscal);
951 Geps = eps*VFtab[nnn+2];
952 Heps2 = eps2*VFtab[nnn+3];
955 FF = Fp+Geps+2.0*Heps2;
958 vctot = vctot + vcoul;
959 fscal = -((fijC)*tabscale)*rinv23;
961 if(force_cap>0 && (fabs(fscal)> force_cap)){
962 fscal=force_cap*fscal/fabs(fscal);
982 Geps = eps*VFtab[nnn+2];
983 Heps2 = eps2*VFtab[nnn+3];
986 FF = Fp+Geps+2.0*Heps2;
989 vctot = vctot + vcoul;
990 fscal = -((fijC)*tabscale)*rinv31;
992 if(force_cap>0 && (fabs(fscal)> force_cap)){
993 fscal=force_cap*fscal/fabs(fscal);
1001 faction[j3+0] = fjx1 - tx;
1002 faction[j3+1] = fjy1 - ty;
1003 faction[j3+2] = fjz1 - tz;
1013 Geps = eps*VFtab[nnn+2];
1014 Heps2 = eps2*VFtab[nnn+3];
1017 FF = Fp+Geps+2.0*Heps2;
1020 vctot = vctot + vcoul;
1021 fscal = -((fijC)*tabscale)*rinv32;
1023 if(force_cap>0 && (fabs(fscal)> force_cap)){
1024 fscal=force_cap*fscal/fabs(fscal);
1032 faction[j3+3] = fjx2 - tx;
1033 faction[j3+4] = fjy2 - ty;
1034 faction[j3+5] = fjz2 - tz;
1044 Geps = eps*VFtab[nnn+2];
1045 Heps2 = eps2*VFtab[nnn+3];
1048 FF = Fp+Geps+2.0*Heps2;
1051 vctot = vctot + vcoul;
1052 fscal = -((fijC)*tabscale)*rinv33;
1054 if(force_cap>0 && (fabs(fscal)> force_cap)){
1055 fscal=force_cap*fscal/fabs(fscal);
1063 faction[j3+6] = fjx3 - tx;
1064 faction[j3+7] = fjy3 - ty;
1065 faction[j3+8] = fjz3 - tz;
1068 faction[ii3+0] = faction[ii3+0] + fix1;
1069 faction[ii3+1] = faction[ii3+1] + fiy1;
1070 faction[ii3+2] = faction[ii3+2] + fiz1;
1071 faction[ii3+3] = faction[ii3+3] + fix2;
1072 faction[ii3+4] = faction[ii3+4] + fiy2;
1073 faction[ii3+5] = faction[ii3+5] + fiz2;
1074 faction[ii3+6] = faction[ii3+6] + fix3;
1075 faction[ii3+7] = faction[ii3+7] + fiy3;
1076 faction[ii3+8] = faction[ii3+8] + fiz3;
1077 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
1078 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
1079 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
1081 Vc[ggid] = Vc[ggid] + vctot;
1082 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
1083 ninner = ninner + nj1 - nj0;
1086 nouter = nouter + nn1 - nn0;
1090 *outeriter = nouter;
1091 *inneriter = ninner;