2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel134_adress.h"
32 * Gromacs nonbonded kernel nb_kernel134_adress_cg
33 * Coulomb interaction: Normal Coulomb
34 * VdW interaction: Tabulated
35 * water optimization: pairs of TIP4P interactions
36 * Calculate forces: yes
38 void nb_kernel134_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
85 real Y,F,Geps,Heps2,Fp,VV;
88 real ix1,iy1,iz1,fix1,fiy1,fiz1;
89 real ix2,iy2,iz2,fix2,fiy2,fiz2;
90 real ix3,iy3,iz3,fix3,fiy3,fiz3;
91 real ix4,iy4,iz4,fix4,fiy4,fiz4;
93 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
94 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
95 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
96 real dx11,dy11,dz11,rsq11,rinv11;
97 real dx22,dy22,dz22,rsq22,rinv22;
98 real dx23,dy23,dz23,rsq23,rinv23;
99 real dx24,dy24,dz24,rsq24,rinv24;
100 real dx32,dy32,dz32,rsq32,rinv32;
101 real dx33,dy33,dz33,rsq33,rinv33;
102 real dx34,dy34,dz34,rsq34,rinv34;
103 real dx42,dy42,dz42,rsq42,rinv42;
104 real dx43,dy43,dz43,rsq43,rinv43;
105 real dx44,dy44,dz44,rsq44,rinv44;
106 real qH,qM,qqMM,qqMH,qqHH;
108 real weight_cg1, weight_cg2, weight_product;
113 nthreads = *p_nthreads;
117 tabscale = *p_tabscale;
124 tj = 2*(ntype+1)*type[ii];
126 c12 = vdwparam[tj+1];
133 #ifdef GMX_THREAD_SHM_FDECOMP
134 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
136 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
138 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
145 for(n=nn0; (n<nn1); n++)
149 shY = shiftvec[is3+1];
150 shZ = shiftvec[is3+2];
155 ix1 = shX + pos[ii3+0];
156 iy1 = shY + pos[ii3+1];
157 iz1 = shZ + pos[ii3+2];
158 ix2 = shX + pos[ii3+3];
159 iy2 = shY + pos[ii3+4];
160 iz2 = shZ + pos[ii3+5];
161 ix3 = shX + pos[ii3+6];
162 iy3 = shY + pos[ii3+7];
163 iz3 = shZ + pos[ii3+8];
164 ix4 = shX + pos[ii3+9];
165 iy4 = shY + pos[ii3+10];
166 iz4 = shZ + pos[ii3+11];
183 for(k=nj0; (k<nj1); k++)
186 weight_cg2 = wf[jnr];
187 weight_product = weight_cg1*weight_cg2;
188 if (weight_product < ALMOST_ZERO) {
191 else if (weight_product >= ALMOST_ONE)
193 /* force is zero, skip this molecule */
198 hybscal = 1.0 - weight_product;
216 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
220 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
224 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
228 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
232 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
236 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
240 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
244 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
248 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
252 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
253 rinv11 = 1.0/sqrt(rsq11);
254 rinv22 = 1.0/sqrt(rsq22);
255 rinv23 = 1.0/sqrt(rsq23);
256 rinv24 = 1.0/sqrt(rsq24);
257 rinv32 = 1.0/sqrt(rsq32);
258 rinv33 = 1.0/sqrt(rsq33);
259 rinv34 = 1.0/sqrt(rsq34);
260 rinv42 = 1.0/sqrt(rsq42);
261 rinv43 = 1.0/sqrt(rsq43);
262 rinv44 = 1.0/sqrt(rsq44);
271 Geps = eps*VFtab[nnn+2];
272 Heps2 = eps2*VFtab[nnn+3];
275 FF = Fp+Geps+2.0*Heps2;
281 Geps = eps*VFtab[nnn+2];
282 Heps2 = eps2*VFtab[nnn+3];
285 FF = Fp+Geps+2.0*Heps2;
288 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
289 fscal = -((fijD+fijR)*tabscale)*rinv11;
297 faction[j3+0] = faction[j3+0] - tx;
298 faction[j3+1] = faction[j3+1] - ty;
299 faction[j3+2] = faction[j3+2] - tz;
301 rinvsq = rinv22*rinv22;
304 fscal = (vcoul)*rinvsq;
312 fjx2 = faction[j3+3] - tx;
313 fjy2 = faction[j3+4] - ty;
314 fjz2 = faction[j3+5] - tz;
316 rinvsq = rinv23*rinv23;
319 fscal = (vcoul)*rinvsq;
327 fjx3 = faction[j3+6] - tx;
328 fjy3 = faction[j3+7] - ty;
329 fjz3 = faction[j3+8] - tz;
331 rinvsq = rinv24*rinv24;
334 fscal = (vcoul)*rinvsq;
342 fjx4 = faction[j3+9] - tx;
343 fjy4 = faction[j3+10] - ty;
344 fjz4 = faction[j3+11] - tz;
346 rinvsq = rinv32*rinv32;
349 fscal = (vcoul)*rinvsq;
361 rinvsq = rinv33*rinv33;
364 fscal = (vcoul)*rinvsq;
376 rinvsq = rinv34*rinv34;
379 fscal = (vcoul)*rinvsq;
391 rinvsq = rinv42*rinv42;
394 fscal = (vcoul)*rinvsq;
402 faction[j3+3] = fjx2 - tx;
403 faction[j3+4] = fjy2 - ty;
404 faction[j3+5] = fjz2 - tz;
406 rinvsq = rinv43*rinv43;
409 fscal = (vcoul)*rinvsq;
417 faction[j3+6] = fjx3 - tx;
418 faction[j3+7] = fjy3 - ty;
419 faction[j3+8] = fjz3 - tz;
421 rinvsq = rinv44*rinv44;
424 fscal = (vcoul)*rinvsq;
432 faction[j3+9] = fjx4 - tx;
433 faction[j3+10] = fjy4 - ty;
434 faction[j3+11] = fjz4 - tz;
437 faction[ii3+0] = faction[ii3+0] + fix1;
438 faction[ii3+1] = faction[ii3+1] + fiy1;
439 faction[ii3+2] = faction[ii3+2] + fiz1;
440 faction[ii3+3] = faction[ii3+3] + fix2;
441 faction[ii3+4] = faction[ii3+4] + fiy2;
442 faction[ii3+5] = faction[ii3+5] + fiz2;
443 faction[ii3+6] = faction[ii3+6] + fix3;
444 faction[ii3+7] = faction[ii3+7] + fiy3;
445 faction[ii3+8] = faction[ii3+8] + fiz3;
446 faction[ii3+9] = faction[ii3+9] + fix4;
447 faction[ii3+10] = faction[ii3+10] + fiy4;
448 faction[ii3+11] = faction[ii3+11] + fiz4;
449 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
450 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
451 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
453 Vc[ggid] = Vc[ggid] + vctot;
454 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
455 ninner = ninner + nj1 - nj0;
458 nouter = nouter + nn1 - nn0;
471 * Gromacs nonbonded kernel nb_kernel134_adress_ex
472 * Coulomb interaction: Normal Coulomb
473 * VdW interaction: Tabulated
474 * water optimization: pairs of TIP4P interactions
475 * Calculate forces: yes
477 void nb_kernel134_adress_ex(
511 int nri,ntype,nthreads;
512 real facel,krf,crf,tabscale,gbtabscale;
513 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
514 int nn0,nn1,nouter,ninner;
524 real Y,F,Geps,Heps2,Fp,VV;
527 real ix1,iy1,iz1,fix1,fiy1,fiz1;
528 real ix2,iy2,iz2,fix2,fiy2,fiz2;
529 real ix3,iy3,iz3,fix3,fiy3,fiz3;
530 real ix4,iy4,iz4,fix4,fiy4,fiz4;
532 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
533 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
534 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
535 real dx11,dy11,dz11,rsq11,rinv11;
536 real dx22,dy22,dz22,rsq22,rinv22;
537 real dx23,dy23,dz23,rsq23,rinv23;
538 real dx24,dy24,dz24,rsq24,rinv24;
539 real dx32,dy32,dz32,rsq32,rinv32;
540 real dx33,dy33,dz33,rsq33,rinv33;
541 real dx34,dy34,dz34,rsq34,rinv34;
542 real dx42,dy42,dz42,rsq42,rinv42;
543 real dx43,dy43,dz43,rsq43,rinv43;
544 real dx44,dy44,dz44,rsq44,rinv44;
545 real qH,qM,qqMM,qqMH,qqHH;
547 real weight_cg1, weight_cg2, weight_product;
552 nthreads = *p_nthreads;
556 tabscale = *p_tabscale;
563 tj = 2*(ntype+1)*type[ii];
565 c12 = vdwparam[tj+1];
572 #ifdef GMX_THREAD_SHM_FDECOMP
573 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
575 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
577 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
584 for(n=nn0; (n<nn1); n++)
588 shY = shiftvec[is3+1];
589 shZ = shiftvec[is3+2];
594 ix1 = shX + pos[ii3+0];
595 iy1 = shY + pos[ii3+1];
596 iz1 = shZ + pos[ii3+2];
597 ix2 = shX + pos[ii3+3];
598 iy2 = shY + pos[ii3+4];
599 iz2 = shZ + pos[ii3+5];
600 ix3 = shX + pos[ii3+6];
601 iy3 = shY + pos[ii3+7];
602 iz3 = shZ + pos[ii3+8];
603 ix4 = shX + pos[ii3+9];
604 iy4 = shY + pos[ii3+10];
605 iz4 = shZ + pos[ii3+11];
622 for(k=nj0; (k<nj1); k++)
625 weight_cg2 = wf[jnr];
626 weight_product = weight_cg1*weight_cg2;
627 if (weight_product < ALMOST_ZERO) {
628 /* force is zero, skip this molecule */
631 else if (weight_product >= ALMOST_ONE)
637 hybscal = weight_product;
655 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
659 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
663 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
667 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
671 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
675 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
679 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
683 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
687 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
691 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
692 rinv11 = 1.0/sqrt(rsq11);
693 rinv22 = 1.0/sqrt(rsq22);
694 rinv23 = 1.0/sqrt(rsq23);
695 rinv24 = 1.0/sqrt(rsq24);
696 rinv32 = 1.0/sqrt(rsq32);
697 rinv33 = 1.0/sqrt(rsq33);
698 rinv34 = 1.0/sqrt(rsq34);
699 rinv42 = 1.0/sqrt(rsq42);
700 rinv43 = 1.0/sqrt(rsq43);
701 rinv44 = 1.0/sqrt(rsq44);
710 Geps = eps*VFtab[nnn+2];
711 Heps2 = eps2*VFtab[nnn+3];
714 FF = Fp+Geps+2.0*Heps2;
720 Geps = eps*VFtab[nnn+2];
721 Heps2 = eps2*VFtab[nnn+3];
724 FF = Fp+Geps+2.0*Heps2;
727 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
728 fscal = -((fijD+fijR)*tabscale)*rinv11;
730 if(force_cap>0 && (fabs(fscal)> force_cap)){
731 fscal=force_cap*fscal/fabs(fscal);
739 faction[j3+0] = faction[j3+0] - tx;
740 faction[j3+1] = faction[j3+1] - ty;
741 faction[j3+2] = faction[j3+2] - tz;
743 rinvsq = rinv22*rinv22;
746 fscal = (vcoul)*rinvsq;
748 if(force_cap>0 && (fabs(fscal)> force_cap)){
749 fscal=force_cap*fscal/fabs(fscal);
757 fjx2 = faction[j3+3] - tx;
758 fjy2 = faction[j3+4] - ty;
759 fjz2 = faction[j3+5] - tz;
761 rinvsq = rinv23*rinv23;
764 fscal = (vcoul)*rinvsq;
766 if(force_cap>0 && (fabs(fscal)> force_cap)){
767 fscal=force_cap*fscal/fabs(fscal);
775 fjx3 = faction[j3+6] - tx;
776 fjy3 = faction[j3+7] - ty;
777 fjz3 = faction[j3+8] - tz;
779 rinvsq = rinv24*rinv24;
782 fscal = (vcoul)*rinvsq;
784 if(force_cap>0 && (fabs(fscal)> force_cap)){
785 fscal=force_cap*fscal/fabs(fscal);
793 fjx4 = faction[j3+9] - tx;
794 fjy4 = faction[j3+10] - ty;
795 fjz4 = faction[j3+11] - tz;
797 rinvsq = rinv32*rinv32;
800 fscal = (vcoul)*rinvsq;
802 if(force_cap>0 && (fabs(fscal)> force_cap)){
803 fscal=force_cap*fscal/fabs(fscal);
815 rinvsq = rinv33*rinv33;
818 fscal = (vcoul)*rinvsq;
820 if(force_cap>0 && (fabs(fscal)> force_cap)){
821 fscal=force_cap*fscal/fabs(fscal);
833 rinvsq = rinv34*rinv34;
836 fscal = (vcoul)*rinvsq;
838 if(force_cap>0 && (fabs(fscal)> force_cap)){
839 fscal=force_cap*fscal/fabs(fscal);
851 rinvsq = rinv42*rinv42;
854 fscal = (vcoul)*rinvsq;
856 if(force_cap>0 && (fabs(fscal)> force_cap)){
857 fscal=force_cap*fscal/fabs(fscal);
865 faction[j3+3] = fjx2 - tx;
866 faction[j3+4] = fjy2 - ty;
867 faction[j3+5] = fjz2 - tz;
869 rinvsq = rinv43*rinv43;
872 fscal = (vcoul)*rinvsq;
874 if(force_cap>0 && (fabs(fscal)> force_cap)){
875 fscal=force_cap*fscal/fabs(fscal);
883 faction[j3+6] = fjx3 - tx;
884 faction[j3+7] = fjy3 - ty;
885 faction[j3+8] = fjz3 - tz;
887 rinvsq = rinv44*rinv44;
890 fscal = (vcoul)*rinvsq;
892 if(force_cap>0 && (fabs(fscal)> force_cap)){
893 fscal=force_cap*fscal/fabs(fscal);
901 faction[j3+9] = fjx4 - tx;
902 faction[j3+10] = fjy4 - ty;
903 faction[j3+11] = fjz4 - tz;
906 faction[ii3+0] = faction[ii3+0] + fix1;
907 faction[ii3+1] = faction[ii3+1] + fiy1;
908 faction[ii3+2] = faction[ii3+2] + fiz1;
909 faction[ii3+3] = faction[ii3+3] + fix2;
910 faction[ii3+4] = faction[ii3+4] + fiy2;
911 faction[ii3+5] = faction[ii3+5] + fiz2;
912 faction[ii3+6] = faction[ii3+6] + fix3;
913 faction[ii3+7] = faction[ii3+7] + fiy3;
914 faction[ii3+8] = faction[ii3+8] + fiz3;
915 faction[ii3+9] = faction[ii3+9] + fix4;
916 faction[ii3+10] = faction[ii3+10] + fiy4;
917 faction[ii3+11] = faction[ii3+11] + fiz4;
918 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
919 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
920 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
922 Vc[ggid] = Vc[ggid] + vctot;
923 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
924 ninner = ninner + nj1 - nj0;
927 nouter = nouter + nn1 - nn0;