2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel314_adress.h"
32 * Gromacs nonbonded kernel nb_kernel314_adress_cg
33 * Coulomb interaction: Tabulated
34 * VdW interaction: Lennard-Jones
35 * water optimization: pairs of TIP4P interactions
36 * Calculate forces: yes
38 void nb_kernel314_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
86 real Y,F,Geps,Heps2,Fp,VV;
89 real ix1,iy1,iz1,fix1,fiy1,fiz1;
90 real ix2,iy2,iz2,fix2,fiy2,fiz2;
91 real ix3,iy3,iz3,fix3,fiy3,fiz3;
92 real ix4,iy4,iz4,fix4,fiy4,fiz4;
94 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
95 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
96 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
97 real dx11,dy11,dz11,rsq11;
98 real dx22,dy22,dz22,rsq22,rinv22;
99 real dx23,dy23,dz23,rsq23,rinv23;
100 real dx24,dy24,dz24,rsq24,rinv24;
101 real dx32,dy32,dz32,rsq32,rinv32;
102 real dx33,dy33,dz33,rsq33,rinv33;
103 real dx34,dy34,dz34,rsq34,rinv34;
104 real dx42,dy42,dz42,rsq42,rinv42;
105 real dx43,dy43,dz43,rsq43,rinv43;
106 real dx44,dy44,dz44,rsq44,rinv44;
107 real qH,qM,qqMM,qqMH,qqHH;
109 real weight_cg1, weight_cg2, weight_product;
114 nthreads = *p_nthreads;
118 tabscale = *p_tabscale;
125 tj = 2*(ntype+1)*type[ii];
127 c12 = vdwparam[tj+1];
134 #ifdef GMX_THREAD_SHM_FDECOMP
135 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
137 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
139 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
146 for(n=nn0; (n<nn1); n++)
150 shY = shiftvec[is3+1];
151 shZ = shiftvec[is3+2];
156 ix1 = shX + pos[ii3+0];
157 iy1 = shY + pos[ii3+1];
158 iz1 = shZ + pos[ii3+2];
159 ix2 = shX + pos[ii3+3];
160 iy2 = shY + pos[ii3+4];
161 iz2 = shZ + pos[ii3+5];
162 ix3 = shX + pos[ii3+6];
163 iy3 = shY + pos[ii3+7];
164 iz3 = shZ + pos[ii3+8];
165 ix4 = shX + pos[ii3+9];
166 iy4 = shY + pos[ii3+10];
167 iz4 = shZ + pos[ii3+11];
184 for(k=nj0; (k<nj1); k++)
187 weight_cg2 = wf[jnr];
188 weight_product = weight_cg1*weight_cg2;
189 if (weight_product < ALMOST_ZERO) {
192 else if (weight_product >= ALMOST_ONE)
194 /* force is zero, skip this molecule */
199 hybscal = 1.0 - weight_product;
217 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
221 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
225 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
229 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
233 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
237 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
241 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
245 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
249 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
253 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
255 rinv22 = 1.0/sqrt(rsq22);
256 rinv23 = 1.0/sqrt(rsq23);
257 rinv24 = 1.0/sqrt(rsq24);
258 rinv32 = 1.0/sqrt(rsq32);
259 rinv33 = 1.0/sqrt(rsq33);
260 rinv34 = 1.0/sqrt(rsq34);
261 rinv42 = 1.0/sqrt(rsq42);
262 rinv43 = 1.0/sqrt(rsq43);
263 rinv44 = 1.0/sqrt(rsq44);
264 rinvsix = rinvsq*rinvsq*rinvsq;
266 Vvdw12 = c12*rinvsix*rinvsix;
267 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
268 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
276 faction[j3+0] = faction[j3+0] - tx;
277 faction[j3+1] = faction[j3+1] - ty;
278 faction[j3+2] = faction[j3+2] - tz;
288 Geps = eps*VFtab[nnn+2];
289 Heps2 = eps2*VFtab[nnn+3];
292 FF = Fp+Geps+2.0*Heps2;
295 vctot = vctot + vcoul;
296 fscal = -((fijC)*tabscale)*rinv22;
304 fjx2 = faction[j3+3] - tx;
305 fjy2 = faction[j3+4] - ty;
306 fjz2 = faction[j3+5] - tz;
316 Geps = eps*VFtab[nnn+2];
317 Heps2 = eps2*VFtab[nnn+3];
320 FF = Fp+Geps+2.0*Heps2;
323 vctot = vctot + vcoul;
324 fscal = -((fijC)*tabscale)*rinv23;
332 fjx3 = faction[j3+6] - tx;
333 fjy3 = faction[j3+7] - ty;
334 fjz3 = faction[j3+8] - tz;
344 Geps = eps*VFtab[nnn+2];
345 Heps2 = eps2*VFtab[nnn+3];
348 FF = Fp+Geps+2.0*Heps2;
351 vctot = vctot + vcoul;
352 fscal = -((fijC)*tabscale)*rinv24;
360 fjx4 = faction[j3+9] - tx;
361 fjy4 = faction[j3+10] - ty;
362 fjz4 = faction[j3+11] - tz;
372 Geps = eps*VFtab[nnn+2];
373 Heps2 = eps2*VFtab[nnn+3];
376 FF = Fp+Geps+2.0*Heps2;
379 vctot = vctot + vcoul;
380 fscal = -((fijC)*tabscale)*rinv32;
400 Geps = eps*VFtab[nnn+2];
401 Heps2 = eps2*VFtab[nnn+3];
404 FF = Fp+Geps+2.0*Heps2;
407 vctot = vctot + vcoul;
408 fscal = -((fijC)*tabscale)*rinv33;
428 Geps = eps*VFtab[nnn+2];
429 Heps2 = eps2*VFtab[nnn+3];
432 FF = Fp+Geps+2.0*Heps2;
435 vctot = vctot + vcoul;
436 fscal = -((fijC)*tabscale)*rinv34;
456 Geps = eps*VFtab[nnn+2];
457 Heps2 = eps2*VFtab[nnn+3];
460 FF = Fp+Geps+2.0*Heps2;
463 vctot = vctot + vcoul;
464 fscal = -((fijC)*tabscale)*rinv42;
472 faction[j3+3] = fjx2 - tx;
473 faction[j3+4] = fjy2 - ty;
474 faction[j3+5] = fjz2 - tz;
484 Geps = eps*VFtab[nnn+2];
485 Heps2 = eps2*VFtab[nnn+3];
488 FF = Fp+Geps+2.0*Heps2;
491 vctot = vctot + vcoul;
492 fscal = -((fijC)*tabscale)*rinv43;
500 faction[j3+6] = fjx3 - tx;
501 faction[j3+7] = fjy3 - ty;
502 faction[j3+8] = fjz3 - tz;
512 Geps = eps*VFtab[nnn+2];
513 Heps2 = eps2*VFtab[nnn+3];
516 FF = Fp+Geps+2.0*Heps2;
519 vctot = vctot + vcoul;
520 fscal = -((fijC)*tabscale)*rinv44;
528 faction[j3+9] = fjx4 - tx;
529 faction[j3+10] = fjy4 - ty;
530 faction[j3+11] = fjz4 - tz;
533 faction[ii3+0] = faction[ii3+0] + fix1;
534 faction[ii3+1] = faction[ii3+1] + fiy1;
535 faction[ii3+2] = faction[ii3+2] + fiz1;
536 faction[ii3+3] = faction[ii3+3] + fix2;
537 faction[ii3+4] = faction[ii3+4] + fiy2;
538 faction[ii3+5] = faction[ii3+5] + fiz2;
539 faction[ii3+6] = faction[ii3+6] + fix3;
540 faction[ii3+7] = faction[ii3+7] + fiy3;
541 faction[ii3+8] = faction[ii3+8] + fiz3;
542 faction[ii3+9] = faction[ii3+9] + fix4;
543 faction[ii3+10] = faction[ii3+10] + fiy4;
544 faction[ii3+11] = faction[ii3+11] + fiz4;
545 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
546 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
547 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
549 Vc[ggid] = Vc[ggid] + vctot;
550 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
551 ninner = ninner + nj1 - nj0;
554 nouter = nouter + nn1 - nn0;
567 * Gromacs nonbonded kernel nb_kernel314_adress_ex
568 * Coulomb interaction: Tabulated
569 * VdW interaction: Lennard-Jones
570 * water optimization: pairs of TIP4P interactions
571 * Calculate forces: yes
573 void nb_kernel314_adress_ex(
607 int nri,ntype,nthreads;
608 real facel,krf,crf,tabscale,gbtabscale;
609 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
610 int nn0,nn1,nouter,ninner;
621 real Y,F,Geps,Heps2,Fp,VV;
624 real ix1,iy1,iz1,fix1,fiy1,fiz1;
625 real ix2,iy2,iz2,fix2,fiy2,fiz2;
626 real ix3,iy3,iz3,fix3,fiy3,fiz3;
627 real ix4,iy4,iz4,fix4,fiy4,fiz4;
629 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
630 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
631 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
632 real dx11,dy11,dz11,rsq11;
633 real dx22,dy22,dz22,rsq22,rinv22;
634 real dx23,dy23,dz23,rsq23,rinv23;
635 real dx24,dy24,dz24,rsq24,rinv24;
636 real dx32,dy32,dz32,rsq32,rinv32;
637 real dx33,dy33,dz33,rsq33,rinv33;
638 real dx34,dy34,dz34,rsq34,rinv34;
639 real dx42,dy42,dz42,rsq42,rinv42;
640 real dx43,dy43,dz43,rsq43,rinv43;
641 real dx44,dy44,dz44,rsq44,rinv44;
642 real qH,qM,qqMM,qqMH,qqHH;
644 real weight_cg1, weight_cg2, weight_product;
649 nthreads = *p_nthreads;
653 tabscale = *p_tabscale;
660 tj = 2*(ntype+1)*type[ii];
662 c12 = vdwparam[tj+1];
669 #ifdef GMX_THREAD_SHM_FDECOMP
670 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
672 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
674 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
681 for(n=nn0; (n<nn1); n++)
685 shY = shiftvec[is3+1];
686 shZ = shiftvec[is3+2];
691 ix1 = shX + pos[ii3+0];
692 iy1 = shY + pos[ii3+1];
693 iz1 = shZ + pos[ii3+2];
694 ix2 = shX + pos[ii3+3];
695 iy2 = shY + pos[ii3+4];
696 iz2 = shZ + pos[ii3+5];
697 ix3 = shX + pos[ii3+6];
698 iy3 = shY + pos[ii3+7];
699 iz3 = shZ + pos[ii3+8];
700 ix4 = shX + pos[ii3+9];
701 iy4 = shY + pos[ii3+10];
702 iz4 = shZ + pos[ii3+11];
719 for(k=nj0; (k<nj1); k++)
722 weight_cg2 = wf[jnr];
723 weight_product = weight_cg1*weight_cg2;
724 if (weight_product < ALMOST_ZERO) {
725 /* force is zero, skip this molecule */
728 else if (weight_product >= ALMOST_ONE)
734 hybscal = weight_product;
752 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
756 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
760 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
764 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
768 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
772 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
776 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
780 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
784 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
788 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
790 rinv22 = 1.0/sqrt(rsq22);
791 rinv23 = 1.0/sqrt(rsq23);
792 rinv24 = 1.0/sqrt(rsq24);
793 rinv32 = 1.0/sqrt(rsq32);
794 rinv33 = 1.0/sqrt(rsq33);
795 rinv34 = 1.0/sqrt(rsq34);
796 rinv42 = 1.0/sqrt(rsq42);
797 rinv43 = 1.0/sqrt(rsq43);
798 rinv44 = 1.0/sqrt(rsq44);
799 rinvsix = rinvsq*rinvsq*rinvsq;
801 Vvdw12 = c12*rinvsix*rinvsix;
802 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
803 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
805 if(force_cap>0 && (fabs(fscal)> force_cap)){
806 fscal=force_cap*fscal/fabs(fscal);
814 faction[j3+0] = faction[j3+0] - tx;
815 faction[j3+1] = faction[j3+1] - ty;
816 faction[j3+2] = faction[j3+2] - tz;
826 Geps = eps*VFtab[nnn+2];
827 Heps2 = eps2*VFtab[nnn+3];
830 FF = Fp+Geps+2.0*Heps2;
833 vctot = vctot + vcoul;
834 fscal = -((fijC)*tabscale)*rinv22;
836 if(force_cap>0 && (fabs(fscal)> force_cap)){
837 fscal=force_cap*fscal/fabs(fscal);
845 fjx2 = faction[j3+3] - tx;
846 fjy2 = faction[j3+4] - ty;
847 fjz2 = faction[j3+5] - tz;
857 Geps = eps*VFtab[nnn+2];
858 Heps2 = eps2*VFtab[nnn+3];
861 FF = Fp+Geps+2.0*Heps2;
864 vctot = vctot + vcoul;
865 fscal = -((fijC)*tabscale)*rinv23;
867 if(force_cap>0 && (fabs(fscal)> force_cap)){
868 fscal=force_cap*fscal/fabs(fscal);
876 fjx3 = faction[j3+6] - tx;
877 fjy3 = faction[j3+7] - ty;
878 fjz3 = faction[j3+8] - tz;
888 Geps = eps*VFtab[nnn+2];
889 Heps2 = eps2*VFtab[nnn+3];
892 FF = Fp+Geps+2.0*Heps2;
895 vctot = vctot + vcoul;
896 fscal = -((fijC)*tabscale)*rinv24;
898 if(force_cap>0 && (fabs(fscal)> force_cap)){
899 fscal=force_cap*fscal/fabs(fscal);
907 fjx4 = faction[j3+9] - tx;
908 fjy4 = faction[j3+10] - ty;
909 fjz4 = faction[j3+11] - tz;
919 Geps = eps*VFtab[nnn+2];
920 Heps2 = eps2*VFtab[nnn+3];
923 FF = Fp+Geps+2.0*Heps2;
926 vctot = vctot + vcoul;
927 fscal = -((fijC)*tabscale)*rinv32;
929 if(force_cap>0 && (fabs(fscal)> force_cap)){
930 fscal=force_cap*fscal/fabs(fscal);
950 Geps = eps*VFtab[nnn+2];
951 Heps2 = eps2*VFtab[nnn+3];
954 FF = Fp+Geps+2.0*Heps2;
957 vctot = vctot + vcoul;
958 fscal = -((fijC)*tabscale)*rinv33;
960 if(force_cap>0 && (fabs(fscal)> force_cap)){
961 fscal=force_cap*fscal/fabs(fscal);
981 Geps = eps*VFtab[nnn+2];
982 Heps2 = eps2*VFtab[nnn+3];
985 FF = Fp+Geps+2.0*Heps2;
988 vctot = vctot + vcoul;
989 fscal = -((fijC)*tabscale)*rinv34;
991 if(force_cap>0 && (fabs(fscal)> force_cap)){
992 fscal=force_cap*fscal/fabs(fscal);
1012 Geps = eps*VFtab[nnn+2];
1013 Heps2 = eps2*VFtab[nnn+3];
1016 FF = Fp+Geps+2.0*Heps2;
1019 vctot = vctot + vcoul;
1020 fscal = -((fijC)*tabscale)*rinv42;
1022 if(force_cap>0 && (fabs(fscal)> force_cap)){
1023 fscal=force_cap*fscal/fabs(fscal);
1031 faction[j3+3] = fjx2 - tx;
1032 faction[j3+4] = fjy2 - ty;
1033 faction[j3+5] = fjz2 - tz;
1043 Geps = eps*VFtab[nnn+2];
1044 Heps2 = eps2*VFtab[nnn+3];
1047 FF = Fp+Geps+2.0*Heps2;
1050 vctot = vctot + vcoul;
1051 fscal = -((fijC)*tabscale)*rinv43;
1053 if(force_cap>0 && (fabs(fscal)> force_cap)){
1054 fscal=force_cap*fscal/fabs(fscal);
1062 faction[j3+6] = fjx3 - tx;
1063 faction[j3+7] = fjy3 - ty;
1064 faction[j3+8] = fjz3 - tz;
1074 Geps = eps*VFtab[nnn+2];
1075 Heps2 = eps2*VFtab[nnn+3];
1078 FF = Fp+Geps+2.0*Heps2;
1081 vctot = vctot + vcoul;
1082 fscal = -((fijC)*tabscale)*rinv44;
1084 if(force_cap>0 && (fabs(fscal)> force_cap)){
1085 fscal=force_cap*fscal/fabs(fscal);
1093 faction[j3+9] = fjx4 - tx;
1094 faction[j3+10] = fjy4 - ty;
1095 faction[j3+11] = fjz4 - tz;
1098 faction[ii3+0] = faction[ii3+0] + fix1;
1099 faction[ii3+1] = faction[ii3+1] + fiy1;
1100 faction[ii3+2] = faction[ii3+2] + fiz1;
1101 faction[ii3+3] = faction[ii3+3] + fix2;
1102 faction[ii3+4] = faction[ii3+4] + fiy2;
1103 faction[ii3+5] = faction[ii3+5] + fiz2;
1104 faction[ii3+6] = faction[ii3+6] + fix3;
1105 faction[ii3+7] = faction[ii3+7] + fiy3;
1106 faction[ii3+8] = faction[ii3+8] + fiz3;
1107 faction[ii3+9] = faction[ii3+9] + fix4;
1108 faction[ii3+10] = faction[ii3+10] + fiy4;
1109 faction[ii3+11] = faction[ii3+11] + fiz4;
1110 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
1111 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
1112 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
1114 Vc[ggid] = Vc[ggid] + vctot;
1115 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
1116 ninner = ninner + nj1 - nj0;
1119 nouter = nouter + nn1 - nn0;
1123 *outeriter = nouter;
1124 *inneriter = ninner;