2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel234_adress.h"
32 * Gromacs nonbonded kernel nb_kernel234_adress_cg
33 * Coulomb interaction: Reaction field
34 * VdW interaction: Tabulated
35 * water optimization: pairs of TIP4P interactions
36 * Calculate forces: yes
38 void nb_kernel234_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
85 real Y,F,Geps,Heps2,Fp,VV;
89 real ix1,iy1,iz1,fix1,fiy1,fiz1;
90 real ix2,iy2,iz2,fix2,fiy2,fiz2;
91 real ix3,iy3,iz3,fix3,fiy3,fiz3;
92 real ix4,iy4,iz4,fix4,fiy4,fiz4;
94 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
95 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
96 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
97 real dx11,dy11,dz11,rsq11,rinv11;
98 real dx22,dy22,dz22,rsq22,rinv22;
99 real dx23,dy23,dz23,rsq23,rinv23;
100 real dx24,dy24,dz24,rsq24,rinv24;
101 real dx32,dy32,dz32,rsq32,rinv32;
102 real dx33,dy33,dz33,rsq33,rinv33;
103 real dx34,dy34,dz34,rsq34,rinv34;
104 real dx42,dy42,dz42,rsq42,rinv42;
105 real dx43,dy43,dz43,rsq43,rinv43;
106 real dx44,dy44,dz44,rsq44,rinv44;
107 real qH,qM,qqMM,qqMH,qqHH;
109 real weight_cg1, weight_cg2, weight_product;
114 nthreads = *p_nthreads;
118 tabscale = *p_tabscale;
125 tj = 2*(ntype+1)*type[ii];
127 c12 = vdwparam[tj+1];
134 #ifdef GMX_THREAD_SHM_FDECOMP
135 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
137 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
139 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
146 for(n=nn0; (n<nn1); n++)
150 shY = shiftvec[is3+1];
151 shZ = shiftvec[is3+2];
156 ix1 = shX + pos[ii3+0];
157 iy1 = shY + pos[ii3+1];
158 iz1 = shZ + pos[ii3+2];
159 ix2 = shX + pos[ii3+3];
160 iy2 = shY + pos[ii3+4];
161 iz2 = shZ + pos[ii3+5];
162 ix3 = shX + pos[ii3+6];
163 iy3 = shY + pos[ii3+7];
164 iz3 = shZ + pos[ii3+8];
165 ix4 = shX + pos[ii3+9];
166 iy4 = shY + pos[ii3+10];
167 iz4 = shZ + pos[ii3+11];
184 for(k=nj0; (k<nj1); k++)
187 weight_cg2 = wf[jnr];
188 weight_product = weight_cg1*weight_cg2;
189 if (weight_product < ALMOST_ZERO) {
192 else if (weight_product >= ALMOST_ONE)
194 /* force is zero, skip this molecule */
199 hybscal = 1.0 - weight_product;
217 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
221 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
225 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
229 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
233 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
237 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
241 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
245 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
249 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
253 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
254 rinv11 = 1.0/sqrt(rsq11);
255 rinv22 = 1.0/sqrt(rsq22);
256 rinv23 = 1.0/sqrt(rsq23);
257 rinv24 = 1.0/sqrt(rsq24);
258 rinv32 = 1.0/sqrt(rsq32);
259 rinv33 = 1.0/sqrt(rsq33);
260 rinv34 = 1.0/sqrt(rsq34);
261 rinv42 = 1.0/sqrt(rsq42);
262 rinv43 = 1.0/sqrt(rsq43);
263 rinv44 = 1.0/sqrt(rsq44);
272 Geps = eps*VFtab[nnn+2];
273 Heps2 = eps2*VFtab[nnn+3];
276 FF = Fp+Geps+2.0*Heps2;
282 Geps = eps*VFtab[nnn+2];
283 Heps2 = eps2*VFtab[nnn+3];
286 FF = Fp+Geps+2.0*Heps2;
289 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
290 fscal = -((fijD+fijR)*tabscale)*rinv11;
298 faction[j3+0] = faction[j3+0] - tx;
299 faction[j3+1] = faction[j3+1] - ty;
300 faction[j3+2] = faction[j3+2] - tz;
302 rinvsq = rinv22*rinv22;
304 vcoul = qq*(rinv22+krsq-crf);
306 fscal = (qq*(rinv22-2.0*krsq))*rinvsq;
314 fjx2 = faction[j3+3] - tx;
315 fjy2 = faction[j3+4] - ty;
316 fjz2 = faction[j3+5] - tz;
318 rinvsq = rinv23*rinv23;
320 vcoul = qq*(rinv23+krsq-crf);
322 fscal = (qq*(rinv23-2.0*krsq))*rinvsq;
330 fjx3 = faction[j3+6] - tx;
331 fjy3 = faction[j3+7] - ty;
332 fjz3 = faction[j3+8] - tz;
334 rinvsq = rinv24*rinv24;
336 vcoul = qq*(rinv24+krsq-crf);
338 fscal = (qq*(rinv24-2.0*krsq))*rinvsq;
346 fjx4 = faction[j3+9] - tx;
347 fjy4 = faction[j3+10] - ty;
348 fjz4 = faction[j3+11] - tz;
350 rinvsq = rinv32*rinv32;
352 vcoul = qq*(rinv32+krsq-crf);
354 fscal = (qq*(rinv32-2.0*krsq))*rinvsq;
366 rinvsq = rinv33*rinv33;
368 vcoul = qq*(rinv33+krsq-crf);
370 fscal = (qq*(rinv33-2.0*krsq))*rinvsq;
382 rinvsq = rinv34*rinv34;
384 vcoul = qq*(rinv34+krsq-crf);
386 fscal = (qq*(rinv34-2.0*krsq))*rinvsq;
398 rinvsq = rinv42*rinv42;
400 vcoul = qq*(rinv42+krsq-crf);
402 fscal = (qq*(rinv42-2.0*krsq))*rinvsq;
410 faction[j3+3] = fjx2 - tx;
411 faction[j3+4] = fjy2 - ty;
412 faction[j3+5] = fjz2 - tz;
414 rinvsq = rinv43*rinv43;
416 vcoul = qq*(rinv43+krsq-crf);
418 fscal = (qq*(rinv43-2.0*krsq))*rinvsq;
426 faction[j3+6] = fjx3 - tx;
427 faction[j3+7] = fjy3 - ty;
428 faction[j3+8] = fjz3 - tz;
430 rinvsq = rinv44*rinv44;
432 vcoul = qq*(rinv44+krsq-crf);
434 fscal = (qq*(rinv44-2.0*krsq))*rinvsq;
442 faction[j3+9] = fjx4 - tx;
443 faction[j3+10] = fjy4 - ty;
444 faction[j3+11] = fjz4 - tz;
447 faction[ii3+0] = faction[ii3+0] + fix1;
448 faction[ii3+1] = faction[ii3+1] + fiy1;
449 faction[ii3+2] = faction[ii3+2] + fiz1;
450 faction[ii3+3] = faction[ii3+3] + fix2;
451 faction[ii3+4] = faction[ii3+4] + fiy2;
452 faction[ii3+5] = faction[ii3+5] + fiz2;
453 faction[ii3+6] = faction[ii3+6] + fix3;
454 faction[ii3+7] = faction[ii3+7] + fiy3;
455 faction[ii3+8] = faction[ii3+8] + fiz3;
456 faction[ii3+9] = faction[ii3+9] + fix4;
457 faction[ii3+10] = faction[ii3+10] + fiy4;
458 faction[ii3+11] = faction[ii3+11] + fiz4;
459 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
460 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
461 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
463 Vc[ggid] = Vc[ggid] + vctot;
464 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
465 ninner = ninner + nj1 - nj0;
468 nouter = nouter + nn1 - nn0;
481 * Gromacs nonbonded kernel nb_kernel234_adress_ex
482 * Coulomb interaction: Reaction field
483 * VdW interaction: Tabulated
484 * water optimization: pairs of TIP4P interactions
485 * Calculate forces: yes
487 void nb_kernel234_adress_ex(
521 int nri,ntype,nthreads;
522 real facel,krf,crf,tabscale,gbtabscale;
523 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
524 int nn0,nn1,nouter,ninner;
534 real Y,F,Geps,Heps2,Fp,VV;
538 real ix1,iy1,iz1,fix1,fiy1,fiz1;
539 real ix2,iy2,iz2,fix2,fiy2,fiz2;
540 real ix3,iy3,iz3,fix3,fiy3,fiz3;
541 real ix4,iy4,iz4,fix4,fiy4,fiz4;
543 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
544 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
545 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
546 real dx11,dy11,dz11,rsq11,rinv11;
547 real dx22,dy22,dz22,rsq22,rinv22;
548 real dx23,dy23,dz23,rsq23,rinv23;
549 real dx24,dy24,dz24,rsq24,rinv24;
550 real dx32,dy32,dz32,rsq32,rinv32;
551 real dx33,dy33,dz33,rsq33,rinv33;
552 real dx34,dy34,dz34,rsq34,rinv34;
553 real dx42,dy42,dz42,rsq42,rinv42;
554 real dx43,dy43,dz43,rsq43,rinv43;
555 real dx44,dy44,dz44,rsq44,rinv44;
556 real qH,qM,qqMM,qqMH,qqHH;
558 real weight_cg1, weight_cg2, weight_product;
563 nthreads = *p_nthreads;
567 tabscale = *p_tabscale;
574 tj = 2*(ntype+1)*type[ii];
576 c12 = vdwparam[tj+1];
583 #ifdef GMX_THREAD_SHM_FDECOMP
584 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
586 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
588 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
595 for(n=nn0; (n<nn1); n++)
599 shY = shiftvec[is3+1];
600 shZ = shiftvec[is3+2];
605 ix1 = shX + pos[ii3+0];
606 iy1 = shY + pos[ii3+1];
607 iz1 = shZ + pos[ii3+2];
608 ix2 = shX + pos[ii3+3];
609 iy2 = shY + pos[ii3+4];
610 iz2 = shZ + pos[ii3+5];
611 ix3 = shX + pos[ii3+6];
612 iy3 = shY + pos[ii3+7];
613 iz3 = shZ + pos[ii3+8];
614 ix4 = shX + pos[ii3+9];
615 iy4 = shY + pos[ii3+10];
616 iz4 = shZ + pos[ii3+11];
633 for(k=nj0; (k<nj1); k++)
636 weight_cg2 = wf[jnr];
637 weight_product = weight_cg1*weight_cg2;
638 if (weight_product < ALMOST_ZERO) {
639 /* force is zero, skip this molecule */
642 else if (weight_product >= ALMOST_ONE)
648 hybscal = weight_product;
666 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
670 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
674 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
678 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
682 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
686 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
690 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
694 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
698 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
702 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
703 rinv11 = 1.0/sqrt(rsq11);
704 rinv22 = 1.0/sqrt(rsq22);
705 rinv23 = 1.0/sqrt(rsq23);
706 rinv24 = 1.0/sqrt(rsq24);
707 rinv32 = 1.0/sqrt(rsq32);
708 rinv33 = 1.0/sqrt(rsq33);
709 rinv34 = 1.0/sqrt(rsq34);
710 rinv42 = 1.0/sqrt(rsq42);
711 rinv43 = 1.0/sqrt(rsq43);
712 rinv44 = 1.0/sqrt(rsq44);
721 Geps = eps*VFtab[nnn+2];
722 Heps2 = eps2*VFtab[nnn+3];
725 FF = Fp+Geps+2.0*Heps2;
731 Geps = eps*VFtab[nnn+2];
732 Heps2 = eps2*VFtab[nnn+3];
735 FF = Fp+Geps+2.0*Heps2;
738 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
739 fscal = -((fijD+fijR)*tabscale)*rinv11;
741 if(force_cap>0 && (fabs(fscal)> force_cap)){
742 fscal=force_cap*fscal/fabs(fscal);
750 faction[j3+0] = faction[j3+0] - tx;
751 faction[j3+1] = faction[j3+1] - ty;
752 faction[j3+2] = faction[j3+2] - tz;
754 rinvsq = rinv22*rinv22;
756 vcoul = qq*(rinv22+krsq-crf);
758 fscal = (qq*(rinv22-2.0*krsq))*rinvsq;
760 if(force_cap>0 && (fabs(fscal)> force_cap)){
761 fscal=force_cap*fscal/fabs(fscal);
769 fjx2 = faction[j3+3] - tx;
770 fjy2 = faction[j3+4] - ty;
771 fjz2 = faction[j3+5] - tz;
773 rinvsq = rinv23*rinv23;
775 vcoul = qq*(rinv23+krsq-crf);
777 fscal = (qq*(rinv23-2.0*krsq))*rinvsq;
779 if(force_cap>0 && (fabs(fscal)> force_cap)){
780 fscal=force_cap*fscal/fabs(fscal);
788 fjx3 = faction[j3+6] - tx;
789 fjy3 = faction[j3+7] - ty;
790 fjz3 = faction[j3+8] - tz;
792 rinvsq = rinv24*rinv24;
794 vcoul = qq*(rinv24+krsq-crf);
796 fscal = (qq*(rinv24-2.0*krsq))*rinvsq;
798 if(force_cap>0 && (fabs(fscal)> force_cap)){
799 fscal=force_cap*fscal/fabs(fscal);
807 fjx4 = faction[j3+9] - tx;
808 fjy4 = faction[j3+10] - ty;
809 fjz4 = faction[j3+11] - tz;
811 rinvsq = rinv32*rinv32;
813 vcoul = qq*(rinv32+krsq-crf);
815 fscal = (qq*(rinv32-2.0*krsq))*rinvsq;
817 if(force_cap>0 && (fabs(fscal)> force_cap)){
818 fscal=force_cap*fscal/fabs(fscal);
830 rinvsq = rinv33*rinv33;
832 vcoul = qq*(rinv33+krsq-crf);
834 fscal = (qq*(rinv33-2.0*krsq))*rinvsq;
836 if(force_cap>0 && (fabs(fscal)> force_cap)){
837 fscal=force_cap*fscal/fabs(fscal);
849 rinvsq = rinv34*rinv34;
851 vcoul = qq*(rinv34+krsq-crf);
853 fscal = (qq*(rinv34-2.0*krsq))*rinvsq;
855 if(force_cap>0 && (fabs(fscal)> force_cap)){
856 fscal=force_cap*fscal/fabs(fscal);
868 rinvsq = rinv42*rinv42;
870 vcoul = qq*(rinv42+krsq-crf);
872 fscal = (qq*(rinv42-2.0*krsq))*rinvsq;
874 if(force_cap>0 && (fabs(fscal)> force_cap)){
875 fscal=force_cap*fscal/fabs(fscal);
883 faction[j3+3] = fjx2 - tx;
884 faction[j3+4] = fjy2 - ty;
885 faction[j3+5] = fjz2 - tz;
887 rinvsq = rinv43*rinv43;
889 vcoul = qq*(rinv43+krsq-crf);
891 fscal = (qq*(rinv43-2.0*krsq))*rinvsq;
893 if(force_cap>0 && (fabs(fscal)> force_cap)){
894 fscal=force_cap*fscal/fabs(fscal);
902 faction[j3+6] = fjx3 - tx;
903 faction[j3+7] = fjy3 - ty;
904 faction[j3+8] = fjz3 - tz;
906 rinvsq = rinv44*rinv44;
908 vcoul = qq*(rinv44+krsq-crf);
910 fscal = (qq*(rinv44-2.0*krsq))*rinvsq;
912 if(force_cap>0 && (fabs(fscal)> force_cap)){
913 fscal=force_cap*fscal/fabs(fscal);
921 faction[j3+9] = fjx4 - tx;
922 faction[j3+10] = fjy4 - ty;
923 faction[j3+11] = fjz4 - tz;
926 faction[ii3+0] = faction[ii3+0] + fix1;
927 faction[ii3+1] = faction[ii3+1] + fiy1;
928 faction[ii3+2] = faction[ii3+2] + fiz1;
929 faction[ii3+3] = faction[ii3+3] + fix2;
930 faction[ii3+4] = faction[ii3+4] + fiy2;
931 faction[ii3+5] = faction[ii3+5] + fiz2;
932 faction[ii3+6] = faction[ii3+6] + fix3;
933 faction[ii3+7] = faction[ii3+7] + fiy3;
934 faction[ii3+8] = faction[ii3+8] + fiz3;
935 faction[ii3+9] = faction[ii3+9] + fix4;
936 faction[ii3+10] = faction[ii3+10] + fiy4;
937 faction[ii3+11] = faction[ii3+11] + fiz4;
938 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
939 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
940 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
942 Vc[ggid] = Vc[ggid] + vctot;
943 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
944 ninner = ninner + nj1 - nj0;
947 nouter = nouter + nn1 - nn0;