2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel302_adress.h"
32 * Gromacs nonbonded kernel nb_kernel302_adress_cg
33 * Coulomb interaction: Tabulated
34 * VdW interaction: Not calculated
35 * water optimization: pairs of SPC/TIP3P interactions
36 * Calculate forces: yes
38 void nb_kernel302_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
81 real Y,F,Geps,Heps2,Fp,VV;
84 real ix1,iy1,iz1,fix1,fiy1,fiz1;
85 real ix2,iy2,iz2,fix2,fiy2,fiz2;
86 real ix3,iy3,iz3,fix3,fiy3,fiz3;
87 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
88 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
89 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
90 real dx11,dy11,dz11,rsq11,rinv11;
91 real dx12,dy12,dz12,rsq12,rinv12;
92 real dx13,dy13,dz13,rsq13,rinv13;
93 real dx21,dy21,dz21,rsq21,rinv21;
94 real dx22,dy22,dz22,rsq22,rinv22;
95 real dx23,dy23,dz23,rsq23,rinv23;
96 real dx31,dy31,dz31,rsq31,rinv31;
97 real dx32,dy32,dz32,rsq32,rinv32;
98 real dx33,dy33,dz33,rsq33,rinv33;
99 real qO,qH,qqOO,qqOH,qqHH;
100 real weight_cg1, weight_cg2, weight_product;
105 nthreads = *p_nthreads;
109 tabscale = *p_tabscale;
122 #ifdef GMX_THREAD_SHM_FDECOMP
123 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
125 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
127 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
134 for(n=nn0; (n<nn1); n++)
138 shY = shiftvec[is3+1];
139 shZ = shiftvec[is3+2];
144 ix1 = shX + pos[ii3+0];
145 iy1 = shY + pos[ii3+1];
146 iz1 = shZ + pos[ii3+2];
147 ix2 = shX + pos[ii3+3];
148 iy2 = shY + pos[ii3+4];
149 iz2 = shZ + pos[ii3+5];
150 ix3 = shX + pos[ii3+6];
151 iy3 = shY + pos[ii3+7];
152 iz3 = shZ + pos[ii3+8];
165 for(k=nj0; (k<nj1); k++)
168 weight_cg2 = wf[jnr];
169 weight_product = weight_cg1*weight_cg2;
170 if (weight_product < ALMOST_ZERO) {
173 else if (weight_product >= ALMOST_ONE)
175 /* force is zero, skip this molecule */
180 hybscal = 1.0 - weight_product;
195 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
199 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
203 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
207 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
211 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
215 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
219 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
223 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
227 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
228 rinv11 = 1.0/sqrt(rsq11);
229 rinv12 = 1.0/sqrt(rsq12);
230 rinv13 = 1.0/sqrt(rsq13);
231 rinv21 = 1.0/sqrt(rsq21);
232 rinv22 = 1.0/sqrt(rsq22);
233 rinv23 = 1.0/sqrt(rsq23);
234 rinv31 = 1.0/sqrt(rsq31);
235 rinv32 = 1.0/sqrt(rsq32);
236 rinv33 = 1.0/sqrt(rsq33);
246 Geps = eps*VFtab[nnn+2];
247 Heps2 = eps2*VFtab[nnn+3];
250 FF = Fp+Geps+2.0*Heps2;
253 vctot = vctot + vcoul;
254 fscal = -((fijC)*tabscale)*rinv11;
262 fjx1 = faction[j3+0] - tx;
263 fjy1 = faction[j3+1] - ty;
264 fjz1 = faction[j3+2] - tz;
274 Geps = eps*VFtab[nnn+2];
275 Heps2 = eps2*VFtab[nnn+3];
278 FF = Fp+Geps+2.0*Heps2;
281 vctot = vctot + vcoul;
282 fscal = -((fijC)*tabscale)*rinv12;
290 fjx2 = faction[j3+3] - tx;
291 fjy2 = faction[j3+4] - ty;
292 fjz2 = faction[j3+5] - tz;
302 Geps = eps*VFtab[nnn+2];
303 Heps2 = eps2*VFtab[nnn+3];
306 FF = Fp+Geps+2.0*Heps2;
309 vctot = vctot + vcoul;
310 fscal = -((fijC)*tabscale)*rinv13;
318 fjx3 = faction[j3+6] - tx;
319 fjy3 = faction[j3+7] - ty;
320 fjz3 = faction[j3+8] - tz;
330 Geps = eps*VFtab[nnn+2];
331 Heps2 = eps2*VFtab[nnn+3];
334 FF = Fp+Geps+2.0*Heps2;
337 vctot = vctot + vcoul;
338 fscal = -((fijC)*tabscale)*rinv21;
358 Geps = eps*VFtab[nnn+2];
359 Heps2 = eps2*VFtab[nnn+3];
362 FF = Fp+Geps+2.0*Heps2;
365 vctot = vctot + vcoul;
366 fscal = -((fijC)*tabscale)*rinv22;
386 Geps = eps*VFtab[nnn+2];
387 Heps2 = eps2*VFtab[nnn+3];
390 FF = Fp+Geps+2.0*Heps2;
393 vctot = vctot + vcoul;
394 fscal = -((fijC)*tabscale)*rinv23;
414 Geps = eps*VFtab[nnn+2];
415 Heps2 = eps2*VFtab[nnn+3];
418 FF = Fp+Geps+2.0*Heps2;
421 vctot = vctot + vcoul;
422 fscal = -((fijC)*tabscale)*rinv31;
430 faction[j3+0] = fjx1 - tx;
431 faction[j3+1] = fjy1 - ty;
432 faction[j3+2] = fjz1 - tz;
442 Geps = eps*VFtab[nnn+2];
443 Heps2 = eps2*VFtab[nnn+3];
446 FF = Fp+Geps+2.0*Heps2;
449 vctot = vctot + vcoul;
450 fscal = -((fijC)*tabscale)*rinv32;
458 faction[j3+3] = fjx2 - tx;
459 faction[j3+4] = fjy2 - ty;
460 faction[j3+5] = fjz2 - tz;
470 Geps = eps*VFtab[nnn+2];
471 Heps2 = eps2*VFtab[nnn+3];
474 FF = Fp+Geps+2.0*Heps2;
477 vctot = vctot + vcoul;
478 fscal = -((fijC)*tabscale)*rinv33;
486 faction[j3+6] = fjx3 - tx;
487 faction[j3+7] = fjy3 - ty;
488 faction[j3+8] = fjz3 - tz;
491 faction[ii3+0] = faction[ii3+0] + fix1;
492 faction[ii3+1] = faction[ii3+1] + fiy1;
493 faction[ii3+2] = faction[ii3+2] + fiz1;
494 faction[ii3+3] = faction[ii3+3] + fix2;
495 faction[ii3+4] = faction[ii3+4] + fiy2;
496 faction[ii3+5] = faction[ii3+5] + fiz2;
497 faction[ii3+6] = faction[ii3+6] + fix3;
498 faction[ii3+7] = faction[ii3+7] + fiy3;
499 faction[ii3+8] = faction[ii3+8] + fiz3;
500 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
501 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
502 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
504 Vc[ggid] = Vc[ggid] + vctot;
505 ninner = ninner + nj1 - nj0;
508 nouter = nouter + nn1 - nn0;
521 * Gromacs nonbonded kernel nb_kernel302_adress_ex
522 * Coulomb interaction: Tabulated
523 * VdW interaction: Not calculated
524 * water optimization: pairs of SPC/TIP3P interactions
525 * Calculate forces: yes
527 void nb_kernel302_adress_ex(
561 int nri,ntype,nthreads;
562 real facel,krf,crf,tabscale,gbtabscale;
563 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
564 int nn0,nn1,nouter,ninner;
570 real Y,F,Geps,Heps2,Fp,VV;
573 real ix1,iy1,iz1,fix1,fiy1,fiz1;
574 real ix2,iy2,iz2,fix2,fiy2,fiz2;
575 real ix3,iy3,iz3,fix3,fiy3,fiz3;
576 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
577 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
578 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
579 real dx11,dy11,dz11,rsq11,rinv11;
580 real dx12,dy12,dz12,rsq12,rinv12;
581 real dx13,dy13,dz13,rsq13,rinv13;
582 real dx21,dy21,dz21,rsq21,rinv21;
583 real dx22,dy22,dz22,rsq22,rinv22;
584 real dx23,dy23,dz23,rsq23,rinv23;
585 real dx31,dy31,dz31,rsq31,rinv31;
586 real dx32,dy32,dz32,rsq32,rinv32;
587 real dx33,dy33,dz33,rsq33,rinv33;
588 real qO,qH,qqOO,qqOH,qqHH;
589 real weight_cg1, weight_cg2, weight_product;
594 nthreads = *p_nthreads;
598 tabscale = *p_tabscale;
611 #ifdef GMX_THREAD_SHM_FDECOMP
612 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
614 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
616 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
623 for(n=nn0; (n<nn1); n++)
627 shY = shiftvec[is3+1];
628 shZ = shiftvec[is3+2];
633 ix1 = shX + pos[ii3+0];
634 iy1 = shY + pos[ii3+1];
635 iz1 = shZ + pos[ii3+2];
636 ix2 = shX + pos[ii3+3];
637 iy2 = shY + pos[ii3+4];
638 iz2 = shZ + pos[ii3+5];
639 ix3 = shX + pos[ii3+6];
640 iy3 = shY + pos[ii3+7];
641 iz3 = shZ + pos[ii3+8];
654 for(k=nj0; (k<nj1); k++)
657 weight_cg2 = wf[jnr];
658 weight_product = weight_cg1*weight_cg2;
659 if (weight_product < ALMOST_ZERO) {
660 /* force is zero, skip this molecule */
663 else if (weight_product >= ALMOST_ONE)
669 hybscal = weight_product;
684 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
688 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
692 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
696 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
700 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
704 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
708 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
712 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
716 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
717 rinv11 = 1.0/sqrt(rsq11);
718 rinv12 = 1.0/sqrt(rsq12);
719 rinv13 = 1.0/sqrt(rsq13);
720 rinv21 = 1.0/sqrt(rsq21);
721 rinv22 = 1.0/sqrt(rsq22);
722 rinv23 = 1.0/sqrt(rsq23);
723 rinv31 = 1.0/sqrt(rsq31);
724 rinv32 = 1.0/sqrt(rsq32);
725 rinv33 = 1.0/sqrt(rsq33);
735 Geps = eps*VFtab[nnn+2];
736 Heps2 = eps2*VFtab[nnn+3];
739 FF = Fp+Geps+2.0*Heps2;
742 vctot = vctot + vcoul;
743 fscal = -((fijC)*tabscale)*rinv11;
751 fjx1 = faction[j3+0] - tx;
752 fjy1 = faction[j3+1] - ty;
753 fjz1 = faction[j3+2] - tz;
763 Geps = eps*VFtab[nnn+2];
764 Heps2 = eps2*VFtab[nnn+3];
767 FF = Fp+Geps+2.0*Heps2;
770 vctot = vctot + vcoul;
771 fscal = -((fijC)*tabscale)*rinv12;
779 fjx2 = faction[j3+3] - tx;
780 fjy2 = faction[j3+4] - ty;
781 fjz2 = faction[j3+5] - tz;
791 Geps = eps*VFtab[nnn+2];
792 Heps2 = eps2*VFtab[nnn+3];
795 FF = Fp+Geps+2.0*Heps2;
798 vctot = vctot + vcoul;
799 fscal = -((fijC)*tabscale)*rinv13;
807 fjx3 = faction[j3+6] - tx;
808 fjy3 = faction[j3+7] - ty;
809 fjz3 = faction[j3+8] - tz;
819 Geps = eps*VFtab[nnn+2];
820 Heps2 = eps2*VFtab[nnn+3];
823 FF = Fp+Geps+2.0*Heps2;
826 vctot = vctot + vcoul;
827 fscal = -((fijC)*tabscale)*rinv21;
847 Geps = eps*VFtab[nnn+2];
848 Heps2 = eps2*VFtab[nnn+3];
851 FF = Fp+Geps+2.0*Heps2;
854 vctot = vctot + vcoul;
855 fscal = -((fijC)*tabscale)*rinv22;
875 Geps = eps*VFtab[nnn+2];
876 Heps2 = eps2*VFtab[nnn+3];
879 FF = Fp+Geps+2.0*Heps2;
882 vctot = vctot + vcoul;
883 fscal = -((fijC)*tabscale)*rinv23;
903 Geps = eps*VFtab[nnn+2];
904 Heps2 = eps2*VFtab[nnn+3];
907 FF = Fp+Geps+2.0*Heps2;
910 vctot = vctot + vcoul;
911 fscal = -((fijC)*tabscale)*rinv31;
919 faction[j3+0] = fjx1 - tx;
920 faction[j3+1] = fjy1 - ty;
921 faction[j3+2] = fjz1 - tz;
931 Geps = eps*VFtab[nnn+2];
932 Heps2 = eps2*VFtab[nnn+3];
935 FF = Fp+Geps+2.0*Heps2;
938 vctot = vctot + vcoul;
939 fscal = -((fijC)*tabscale)*rinv32;
947 faction[j3+3] = fjx2 - tx;
948 faction[j3+4] = fjy2 - ty;
949 faction[j3+5] = fjz2 - tz;
959 Geps = eps*VFtab[nnn+2];
960 Heps2 = eps2*VFtab[nnn+3];
963 FF = Fp+Geps+2.0*Heps2;
966 vctot = vctot + vcoul;
967 fscal = -((fijC)*tabscale)*rinv33;
975 faction[j3+6] = fjx3 - tx;
976 faction[j3+7] = fjy3 - ty;
977 faction[j3+8] = fjz3 - tz;
980 faction[ii3+0] = faction[ii3+0] + fix1;
981 faction[ii3+1] = faction[ii3+1] + fiy1;
982 faction[ii3+2] = faction[ii3+2] + fiz1;
983 faction[ii3+3] = faction[ii3+3] + fix2;
984 faction[ii3+4] = faction[ii3+4] + fiy2;
985 faction[ii3+5] = faction[ii3+5] + fiz2;
986 faction[ii3+6] = faction[ii3+6] + fix3;
987 faction[ii3+7] = faction[ii3+7] + fiy3;
988 faction[ii3+8] = faction[ii3+8] + fiz3;
989 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
990 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
991 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
993 Vc[ggid] = Vc[ggid] + vctot;
994 ninner = ninner + nj1 - nj0;
997 nouter = nouter + nn1 - nn0;
1001 *outeriter = nouter;
1002 *inneriter = ninner;