2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel334_adress.h"
32 * Gromacs nonbonded kernel nb_kernel334_adress_cg
33 * Coulomb interaction: Tabulated
34 * VdW interaction: Tabulated
35 * water optimization: pairs of TIP4P interactions
36 * Calculate forces: yes
38 void nb_kernel334_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
84 real Y,F,Geps,Heps2,Fp,VV;
88 real ix1,iy1,iz1,fix1,fiy1,fiz1;
89 real ix2,iy2,iz2,fix2,fiy2,fiz2;
90 real ix3,iy3,iz3,fix3,fiy3,fiz3;
91 real ix4,iy4,iz4,fix4,fiy4,fiz4;
93 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
94 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
95 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
96 real dx11,dy11,dz11,rsq11,rinv11;
97 real dx22,dy22,dz22,rsq22,rinv22;
98 real dx23,dy23,dz23,rsq23,rinv23;
99 real dx24,dy24,dz24,rsq24,rinv24;
100 real dx32,dy32,dz32,rsq32,rinv32;
101 real dx33,dy33,dz33,rsq33,rinv33;
102 real dx34,dy34,dz34,rsq34,rinv34;
103 real dx42,dy42,dz42,rsq42,rinv42;
104 real dx43,dy43,dz43,rsq43,rinv43;
105 real dx44,dy44,dz44,rsq44,rinv44;
106 real qH,qM,qqMM,qqMH,qqHH;
108 real weight_cg1, weight_cg2, weight_product;
113 nthreads = *p_nthreads;
117 tabscale = *p_tabscale;
124 tj = 2*(ntype+1)*type[ii];
126 c12 = vdwparam[tj+1];
133 #ifdef GMX_THREAD_SHM_FDECOMP
134 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
136 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
138 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
145 for(n=nn0; (n<nn1); n++)
149 shY = shiftvec[is3+1];
150 shZ = shiftvec[is3+2];
155 ix1 = shX + pos[ii3+0];
156 iy1 = shY + pos[ii3+1];
157 iz1 = shZ + pos[ii3+2];
158 ix2 = shX + pos[ii3+3];
159 iy2 = shY + pos[ii3+4];
160 iz2 = shZ + pos[ii3+5];
161 ix3 = shX + pos[ii3+6];
162 iy3 = shY + pos[ii3+7];
163 iz3 = shZ + pos[ii3+8];
164 ix4 = shX + pos[ii3+9];
165 iy4 = shY + pos[ii3+10];
166 iz4 = shZ + pos[ii3+11];
183 for(k=nj0; (k<nj1); k++)
186 weight_cg2 = wf[jnr];
187 weight_product = weight_cg1*weight_cg2;
188 if (weight_product < ALMOST_ZERO) {
191 else if (weight_product >= ALMOST_ONE)
193 /* force is zero, skip this molecule */
198 hybscal = 1.0 - weight_product;
216 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
220 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
224 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
228 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
232 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
236 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
240 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
244 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
248 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
252 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
253 rinv11 = 1.0/sqrt(rsq11);
254 rinv22 = 1.0/sqrt(rsq22);
255 rinv23 = 1.0/sqrt(rsq23);
256 rinv24 = 1.0/sqrt(rsq24);
257 rinv32 = 1.0/sqrt(rsq32);
258 rinv33 = 1.0/sqrt(rsq33);
259 rinv34 = 1.0/sqrt(rsq34);
260 rinv42 = 1.0/sqrt(rsq42);
261 rinv43 = 1.0/sqrt(rsq43);
262 rinv44 = 1.0/sqrt(rsq44);
272 Geps = eps*VFtab[nnn+2];
273 Heps2 = eps2*VFtab[nnn+3];
276 FF = Fp+Geps+2.0*Heps2;
282 Geps = eps*VFtab[nnn+2];
283 Heps2 = eps2*VFtab[nnn+3];
286 FF = Fp+Geps+2.0*Heps2;
289 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
290 fscal = -((fijD+fijR)*tabscale)*rinv11;
298 faction[j3+0] = faction[j3+0] - tx;
299 faction[j3+1] = faction[j3+1] - ty;
300 faction[j3+2] = faction[j3+2] - tz;
310 Geps = eps*VFtab[nnn+2];
311 Heps2 = eps2*VFtab[nnn+3];
314 FF = Fp+Geps+2.0*Heps2;
317 vctot = vctot + vcoul;
318 fscal = -((fijC)*tabscale)*rinv22;
326 fjx2 = faction[j3+3] - tx;
327 fjy2 = faction[j3+4] - ty;
328 fjz2 = faction[j3+5] - tz;
338 Geps = eps*VFtab[nnn+2];
339 Heps2 = eps2*VFtab[nnn+3];
342 FF = Fp+Geps+2.0*Heps2;
345 vctot = vctot + vcoul;
346 fscal = -((fijC)*tabscale)*rinv23;
354 fjx3 = faction[j3+6] - tx;
355 fjy3 = faction[j3+7] - ty;
356 fjz3 = faction[j3+8] - tz;
366 Geps = eps*VFtab[nnn+2];
367 Heps2 = eps2*VFtab[nnn+3];
370 FF = Fp+Geps+2.0*Heps2;
373 vctot = vctot + vcoul;
374 fscal = -((fijC)*tabscale)*rinv24;
382 fjx4 = faction[j3+9] - tx;
383 fjy4 = faction[j3+10] - ty;
384 fjz4 = faction[j3+11] - tz;
394 Geps = eps*VFtab[nnn+2];
395 Heps2 = eps2*VFtab[nnn+3];
398 FF = Fp+Geps+2.0*Heps2;
401 vctot = vctot + vcoul;
402 fscal = -((fijC)*tabscale)*rinv32;
422 Geps = eps*VFtab[nnn+2];
423 Heps2 = eps2*VFtab[nnn+3];
426 FF = Fp+Geps+2.0*Heps2;
429 vctot = vctot + vcoul;
430 fscal = -((fijC)*tabscale)*rinv33;
450 Geps = eps*VFtab[nnn+2];
451 Heps2 = eps2*VFtab[nnn+3];
454 FF = Fp+Geps+2.0*Heps2;
457 vctot = vctot + vcoul;
458 fscal = -((fijC)*tabscale)*rinv34;
478 Geps = eps*VFtab[nnn+2];
479 Heps2 = eps2*VFtab[nnn+3];
482 FF = Fp+Geps+2.0*Heps2;
485 vctot = vctot + vcoul;
486 fscal = -((fijC)*tabscale)*rinv42;
494 faction[j3+3] = fjx2 - tx;
495 faction[j3+4] = fjy2 - ty;
496 faction[j3+5] = fjz2 - tz;
506 Geps = eps*VFtab[nnn+2];
507 Heps2 = eps2*VFtab[nnn+3];
510 FF = Fp+Geps+2.0*Heps2;
513 vctot = vctot + vcoul;
514 fscal = -((fijC)*tabscale)*rinv43;
522 faction[j3+6] = fjx3 - tx;
523 faction[j3+7] = fjy3 - ty;
524 faction[j3+8] = fjz3 - tz;
534 Geps = eps*VFtab[nnn+2];
535 Heps2 = eps2*VFtab[nnn+3];
538 FF = Fp+Geps+2.0*Heps2;
541 vctot = vctot + vcoul;
542 fscal = -((fijC)*tabscale)*rinv44;
550 faction[j3+9] = fjx4 - tx;
551 faction[j3+10] = fjy4 - ty;
552 faction[j3+11] = fjz4 - tz;
555 faction[ii3+0] = faction[ii3+0] + fix1;
556 faction[ii3+1] = faction[ii3+1] + fiy1;
557 faction[ii3+2] = faction[ii3+2] + fiz1;
558 faction[ii3+3] = faction[ii3+3] + fix2;
559 faction[ii3+4] = faction[ii3+4] + fiy2;
560 faction[ii3+5] = faction[ii3+5] + fiz2;
561 faction[ii3+6] = faction[ii3+6] + fix3;
562 faction[ii3+7] = faction[ii3+7] + fiy3;
563 faction[ii3+8] = faction[ii3+8] + fiz3;
564 faction[ii3+9] = faction[ii3+9] + fix4;
565 faction[ii3+10] = faction[ii3+10] + fiy4;
566 faction[ii3+11] = faction[ii3+11] + fiz4;
567 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
568 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
569 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
571 Vc[ggid] = Vc[ggid] + vctot;
572 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
573 ninner = ninner + nj1 - nj0;
576 nouter = nouter + nn1 - nn0;
589 * Gromacs nonbonded kernel nb_kernel334_adress_ex
590 * Coulomb interaction: Tabulated
591 * VdW interaction: Tabulated
592 * water optimization: pairs of TIP4P interactions
593 * Calculate forces: yes
595 void nb_kernel334_adress_ex(
629 int nri,ntype,nthreads;
630 real facel,krf,crf,tabscale,gbtabscale;
631 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
632 int nn0,nn1,nouter,ninner;
641 real Y,F,Geps,Heps2,Fp,VV;
645 real ix1,iy1,iz1,fix1,fiy1,fiz1;
646 real ix2,iy2,iz2,fix2,fiy2,fiz2;
647 real ix3,iy3,iz3,fix3,fiy3,fiz3;
648 real ix4,iy4,iz4,fix4,fiy4,fiz4;
650 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
651 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
652 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
653 real dx11,dy11,dz11,rsq11,rinv11;
654 real dx22,dy22,dz22,rsq22,rinv22;
655 real dx23,dy23,dz23,rsq23,rinv23;
656 real dx24,dy24,dz24,rsq24,rinv24;
657 real dx32,dy32,dz32,rsq32,rinv32;
658 real dx33,dy33,dz33,rsq33,rinv33;
659 real dx34,dy34,dz34,rsq34,rinv34;
660 real dx42,dy42,dz42,rsq42,rinv42;
661 real dx43,dy43,dz43,rsq43,rinv43;
662 real dx44,dy44,dz44,rsq44,rinv44;
663 real qH,qM,qqMM,qqMH,qqHH;
665 real weight_cg1, weight_cg2, weight_product;
670 nthreads = *p_nthreads;
674 tabscale = *p_tabscale;
681 tj = 2*(ntype+1)*type[ii];
683 c12 = vdwparam[tj+1];
690 #ifdef GMX_THREAD_SHM_FDECOMP
691 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
693 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
695 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
702 for(n=nn0; (n<nn1); n++)
706 shY = shiftvec[is3+1];
707 shZ = shiftvec[is3+2];
712 ix1 = shX + pos[ii3+0];
713 iy1 = shY + pos[ii3+1];
714 iz1 = shZ + pos[ii3+2];
715 ix2 = shX + pos[ii3+3];
716 iy2 = shY + pos[ii3+4];
717 iz2 = shZ + pos[ii3+5];
718 ix3 = shX + pos[ii3+6];
719 iy3 = shY + pos[ii3+7];
720 iz3 = shZ + pos[ii3+8];
721 ix4 = shX + pos[ii3+9];
722 iy4 = shY + pos[ii3+10];
723 iz4 = shZ + pos[ii3+11];
740 for(k=nj0; (k<nj1); k++)
743 weight_cg2 = wf[jnr];
744 weight_product = weight_cg1*weight_cg2;
745 if (weight_product < ALMOST_ZERO) {
746 /* force is zero, skip this molecule */
749 else if (weight_product >= ALMOST_ONE)
755 hybscal = weight_product;
773 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
777 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
781 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
785 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
789 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
793 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
797 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
801 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
805 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
809 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
810 rinv11 = 1.0/sqrt(rsq11);
811 rinv22 = 1.0/sqrt(rsq22);
812 rinv23 = 1.0/sqrt(rsq23);
813 rinv24 = 1.0/sqrt(rsq24);
814 rinv32 = 1.0/sqrt(rsq32);
815 rinv33 = 1.0/sqrt(rsq33);
816 rinv34 = 1.0/sqrt(rsq34);
817 rinv42 = 1.0/sqrt(rsq42);
818 rinv43 = 1.0/sqrt(rsq43);
819 rinv44 = 1.0/sqrt(rsq44);
829 Geps = eps*VFtab[nnn+2];
830 Heps2 = eps2*VFtab[nnn+3];
833 FF = Fp+Geps+2.0*Heps2;
839 Geps = eps*VFtab[nnn+2];
840 Heps2 = eps2*VFtab[nnn+3];
843 FF = Fp+Geps+2.0*Heps2;
846 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
847 fscal = -((fijD+fijR)*tabscale)*rinv11;
849 if(force_cap>0 && (fabs(fscal)> force_cap)){
850 fscal=force_cap*fscal/fabs(fscal);
858 faction[j3+0] = faction[j3+0] - tx;
859 faction[j3+1] = faction[j3+1] - ty;
860 faction[j3+2] = faction[j3+2] - tz;
870 Geps = eps*VFtab[nnn+2];
871 Heps2 = eps2*VFtab[nnn+3];
874 FF = Fp+Geps+2.0*Heps2;
877 vctot = vctot + vcoul;
878 fscal = -((fijC)*tabscale)*rinv22;
880 if(force_cap>0 && (fabs(fscal)> force_cap)){
881 fscal=force_cap*fscal/fabs(fscal);
889 fjx2 = faction[j3+3] - tx;
890 fjy2 = faction[j3+4] - ty;
891 fjz2 = faction[j3+5] - tz;
901 Geps = eps*VFtab[nnn+2];
902 Heps2 = eps2*VFtab[nnn+3];
905 FF = Fp+Geps+2.0*Heps2;
908 vctot = vctot + vcoul;
909 fscal = -((fijC)*tabscale)*rinv23;
911 if(force_cap>0 && (fabs(fscal)> force_cap)){
912 fscal=force_cap*fscal/fabs(fscal);
920 fjx3 = faction[j3+6] - tx;
921 fjy3 = faction[j3+7] - ty;
922 fjz3 = faction[j3+8] - tz;
932 Geps = eps*VFtab[nnn+2];
933 Heps2 = eps2*VFtab[nnn+3];
936 FF = Fp+Geps+2.0*Heps2;
939 vctot = vctot + vcoul;
940 fscal = -((fijC)*tabscale)*rinv24;
942 if(force_cap>0 && (fabs(fscal)> force_cap)){
943 fscal=force_cap*fscal/fabs(fscal);
951 fjx4 = faction[j3+9] - tx;
952 fjy4 = faction[j3+10] - ty;
953 fjz4 = faction[j3+11] - tz;
963 Geps = eps*VFtab[nnn+2];
964 Heps2 = eps2*VFtab[nnn+3];
967 FF = Fp+Geps+2.0*Heps2;
970 vctot = vctot + vcoul;
971 fscal = -((fijC)*tabscale)*rinv32;
973 if(force_cap>0 && (fabs(fscal)> force_cap)){
974 fscal=force_cap*fscal/fabs(fscal);
994 Geps = eps*VFtab[nnn+2];
995 Heps2 = eps2*VFtab[nnn+3];
998 FF = Fp+Geps+2.0*Heps2;
1001 vctot = vctot + vcoul;
1002 fscal = -((fijC)*tabscale)*rinv33;
1004 if(force_cap>0 && (fabs(fscal)> force_cap)){
1005 fscal=force_cap*fscal/fabs(fscal);
1025 Geps = eps*VFtab[nnn+2];
1026 Heps2 = eps2*VFtab[nnn+3];
1029 FF = Fp+Geps+2.0*Heps2;
1032 vctot = vctot + vcoul;
1033 fscal = -((fijC)*tabscale)*rinv34;
1035 if(force_cap>0 && (fabs(fscal)> force_cap)){
1036 fscal=force_cap*fscal/fabs(fscal);
1056 Geps = eps*VFtab[nnn+2];
1057 Heps2 = eps2*VFtab[nnn+3];
1060 FF = Fp+Geps+2.0*Heps2;
1063 vctot = vctot + vcoul;
1064 fscal = -((fijC)*tabscale)*rinv42;
1066 if(force_cap>0 && (fabs(fscal)> force_cap)){
1067 fscal=force_cap*fscal/fabs(fscal);
1075 faction[j3+3] = fjx2 - tx;
1076 faction[j3+4] = fjy2 - ty;
1077 faction[j3+5] = fjz2 - tz;
1087 Geps = eps*VFtab[nnn+2];
1088 Heps2 = eps2*VFtab[nnn+3];
1091 FF = Fp+Geps+2.0*Heps2;
1094 vctot = vctot + vcoul;
1095 fscal = -((fijC)*tabscale)*rinv43;
1097 if(force_cap>0 && (fabs(fscal)> force_cap)){
1098 fscal=force_cap*fscal/fabs(fscal);
1106 faction[j3+6] = fjx3 - tx;
1107 faction[j3+7] = fjy3 - ty;
1108 faction[j3+8] = fjz3 - tz;
1118 Geps = eps*VFtab[nnn+2];
1119 Heps2 = eps2*VFtab[nnn+3];
1122 FF = Fp+Geps+2.0*Heps2;
1125 vctot = vctot + vcoul;
1126 fscal = -((fijC)*tabscale)*rinv44;
1128 if(force_cap>0 && (fabs(fscal)> force_cap)){
1129 fscal=force_cap*fscal/fabs(fscal);
1137 faction[j3+9] = fjx4 - tx;
1138 faction[j3+10] = fjy4 - ty;
1139 faction[j3+11] = fjz4 - tz;
1142 faction[ii3+0] = faction[ii3+0] + fix1;
1143 faction[ii3+1] = faction[ii3+1] + fiy1;
1144 faction[ii3+2] = faction[ii3+2] + fiz1;
1145 faction[ii3+3] = faction[ii3+3] + fix2;
1146 faction[ii3+4] = faction[ii3+4] + fiy2;
1147 faction[ii3+5] = faction[ii3+5] + fiz2;
1148 faction[ii3+6] = faction[ii3+6] + fix3;
1149 faction[ii3+7] = faction[ii3+7] + fiy3;
1150 faction[ii3+8] = faction[ii3+8] + fiz3;
1151 faction[ii3+9] = faction[ii3+9] + fix4;
1152 faction[ii3+10] = faction[ii3+10] + fiy4;
1153 faction[ii3+11] = faction[ii3+11] + fiz4;
1154 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
1155 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
1156 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
1158 Vc[ggid] = Vc[ggid] + vctot;
1159 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
1160 ninner = ninner + nj1 - nj0;
1163 nouter = nouter + nn1 - nn0;
1167 *outeriter = nouter;
1168 *inneriter = ninner;