2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel124_adress.h"
32 * Gromacs nonbonded kernel nb_kernel124_adress_cg
33 * Coulomb interaction: Normal Coulomb
34 * VdW interaction: Buckingham
35 * water optimization: pairs of TIP4P interactions
36 * Calculate forces: yes
38 void nb_kernel124_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
84 real ix1,iy1,iz1,fix1,fiy1,fiz1;
85 real ix2,iy2,iz2,fix2,fiy2,fiz2;
86 real ix3,iy3,iz3,fix3,fiy3,fiz3;
87 real ix4,iy4,iz4,fix4,fiy4,fiz4;
89 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
90 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
91 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
92 real dx11,dy11,dz11,rsq11,rinv11;
93 real dx22,dy22,dz22,rsq22,rinv22;
94 real dx23,dy23,dz23,rsq23,rinv23;
95 real dx24,dy24,dz24,rsq24,rinv24;
96 real dx32,dy32,dz32,rsq32,rinv32;
97 real dx33,dy33,dz33,rsq33,rinv33;
98 real dx34,dy34,dz34,rsq34,rinv34;
99 real dx42,dy42,dz42,rsq42,rinv42;
100 real dx43,dy43,dz43,rsq43,rinv43;
101 real dx44,dy44,dz44,rsq44,rinv44;
102 real qH,qM,qqMM,qqMH,qqHH;
104 real weight_cg1, weight_cg2, weight_product;
109 nthreads = *p_nthreads;
113 tabscale = *p_tabscale;
120 tj = 3*(ntype+1)*type[ii];
122 cexp1 = vdwparam[tj+1];
123 cexp2 = vdwparam[tj+2];
130 #ifdef GMX_THREAD_SHM_FDECOMP
131 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
133 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
135 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
142 for(n=nn0; (n<nn1); n++)
146 shY = shiftvec[is3+1];
147 shZ = shiftvec[is3+2];
152 ix1 = shX + pos[ii3+0];
153 iy1 = shY + pos[ii3+1];
154 iz1 = shZ + pos[ii3+2];
155 ix2 = shX + pos[ii3+3];
156 iy2 = shY + pos[ii3+4];
157 iz2 = shZ + pos[ii3+5];
158 ix3 = shX + pos[ii3+6];
159 iy3 = shY + pos[ii3+7];
160 iz3 = shZ + pos[ii3+8];
161 ix4 = shX + pos[ii3+9];
162 iy4 = shY + pos[ii3+10];
163 iz4 = shZ + pos[ii3+11];
180 for(k=nj0; (k<nj1); k++)
183 weight_cg2 = wf[jnr];
184 weight_product = weight_cg1*weight_cg2;
185 if (weight_product < ALMOST_ZERO) {
188 else if (weight_product >= ALMOST_ONE)
190 /* force is zero, skip this molecule */
195 hybscal = 1.0 - weight_product;
213 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
217 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
221 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
225 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
229 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
233 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
237 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
241 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
245 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
249 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
250 rinv11 = 1.0/sqrt(rsq11);
251 rinv22 = 1.0/sqrt(rsq22);
252 rinv23 = 1.0/sqrt(rsq23);
253 rinv24 = 1.0/sqrt(rsq24);
254 rinv32 = 1.0/sqrt(rsq32);
255 rinv33 = 1.0/sqrt(rsq33);
256 rinv34 = 1.0/sqrt(rsq34);
257 rinv42 = 1.0/sqrt(rsq42);
258 rinv43 = 1.0/sqrt(rsq43);
259 rinv44 = 1.0/sqrt(rsq44);
260 rinvsq = rinv11*rinv11;
261 rinvsix = rinvsq*rinvsq*rinvsq;
263 br = cexp2*rsq11*rinv11;
264 Vvdwexp = cexp1*exp(-br);
265 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6;
266 fscal = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
274 faction[j3+0] = faction[j3+0] - tx;
275 faction[j3+1] = faction[j3+1] - ty;
276 faction[j3+2] = faction[j3+2] - tz;
278 rinvsq = rinv22*rinv22;
281 fscal = (vcoul)*rinvsq;
289 fjx2 = faction[j3+3] - tx;
290 fjy2 = faction[j3+4] - ty;
291 fjz2 = faction[j3+5] - tz;
293 rinvsq = rinv23*rinv23;
296 fscal = (vcoul)*rinvsq;
304 fjx3 = faction[j3+6] - tx;
305 fjy3 = faction[j3+7] - ty;
306 fjz3 = faction[j3+8] - tz;
308 rinvsq = rinv24*rinv24;
311 fscal = (vcoul)*rinvsq;
319 fjx4 = faction[j3+9] - tx;
320 fjy4 = faction[j3+10] - ty;
321 fjz4 = faction[j3+11] - tz;
323 rinvsq = rinv32*rinv32;
326 fscal = (vcoul)*rinvsq;
338 rinvsq = rinv33*rinv33;
341 fscal = (vcoul)*rinvsq;
353 rinvsq = rinv34*rinv34;
356 fscal = (vcoul)*rinvsq;
368 rinvsq = rinv42*rinv42;
371 fscal = (vcoul)*rinvsq;
379 faction[j3+3] = fjx2 - tx;
380 faction[j3+4] = fjy2 - ty;
381 faction[j3+5] = fjz2 - tz;
383 rinvsq = rinv43*rinv43;
386 fscal = (vcoul)*rinvsq;
394 faction[j3+6] = fjx3 - tx;
395 faction[j3+7] = fjy3 - ty;
396 faction[j3+8] = fjz3 - tz;
398 rinvsq = rinv44*rinv44;
401 fscal = (vcoul)*rinvsq;
409 faction[j3+9] = fjx4 - tx;
410 faction[j3+10] = fjy4 - ty;
411 faction[j3+11] = fjz4 - tz;
414 faction[ii3+0] = faction[ii3+0] + fix1;
415 faction[ii3+1] = faction[ii3+1] + fiy1;
416 faction[ii3+2] = faction[ii3+2] + fiz1;
417 faction[ii3+3] = faction[ii3+3] + fix2;
418 faction[ii3+4] = faction[ii3+4] + fiy2;
419 faction[ii3+5] = faction[ii3+5] + fiz2;
420 faction[ii3+6] = faction[ii3+6] + fix3;
421 faction[ii3+7] = faction[ii3+7] + fiy3;
422 faction[ii3+8] = faction[ii3+8] + fiz3;
423 faction[ii3+9] = faction[ii3+9] + fix4;
424 faction[ii3+10] = faction[ii3+10] + fiy4;
425 faction[ii3+11] = faction[ii3+11] + fiz4;
426 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
427 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
428 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
430 Vc[ggid] = Vc[ggid] + vctot;
431 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
432 ninner = ninner + nj1 - nj0;
435 nouter = nouter + nn1 - nn0;
448 * Gromacs nonbonded kernel nb_kernel124_adress_ex
449 * Coulomb interaction: Normal Coulomb
450 * VdW interaction: Buckingham
451 * water optimization: pairs of TIP4P interactions
452 * Calculate forces: yes
454 void nb_kernel124_adress_ex(
488 int nri,ntype,nthreads;
489 real facel,krf,crf,tabscale,gbtabscale;
490 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
491 int nn0,nn1,nouter,ninner;
500 real ix1,iy1,iz1,fix1,fiy1,fiz1;
501 real ix2,iy2,iz2,fix2,fiy2,fiz2;
502 real ix3,iy3,iz3,fix3,fiy3,fiz3;
503 real ix4,iy4,iz4,fix4,fiy4,fiz4;
505 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
506 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
507 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
508 real dx11,dy11,dz11,rsq11,rinv11;
509 real dx22,dy22,dz22,rsq22,rinv22;
510 real dx23,dy23,dz23,rsq23,rinv23;
511 real dx24,dy24,dz24,rsq24,rinv24;
512 real dx32,dy32,dz32,rsq32,rinv32;
513 real dx33,dy33,dz33,rsq33,rinv33;
514 real dx34,dy34,dz34,rsq34,rinv34;
515 real dx42,dy42,dz42,rsq42,rinv42;
516 real dx43,dy43,dz43,rsq43,rinv43;
517 real dx44,dy44,dz44,rsq44,rinv44;
518 real qH,qM,qqMM,qqMH,qqHH;
520 real weight_cg1, weight_cg2, weight_product;
525 nthreads = *p_nthreads;
529 tabscale = *p_tabscale;
536 tj = 3*(ntype+1)*type[ii];
538 cexp1 = vdwparam[tj+1];
539 cexp2 = vdwparam[tj+2];
546 #ifdef GMX_THREAD_SHM_FDECOMP
547 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
549 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
551 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
558 for(n=nn0; (n<nn1); n++)
562 shY = shiftvec[is3+1];
563 shZ = shiftvec[is3+2];
568 ix1 = shX + pos[ii3+0];
569 iy1 = shY + pos[ii3+1];
570 iz1 = shZ + pos[ii3+2];
571 ix2 = shX + pos[ii3+3];
572 iy2 = shY + pos[ii3+4];
573 iz2 = shZ + pos[ii3+5];
574 ix3 = shX + pos[ii3+6];
575 iy3 = shY + pos[ii3+7];
576 iz3 = shZ + pos[ii3+8];
577 ix4 = shX + pos[ii3+9];
578 iy4 = shY + pos[ii3+10];
579 iz4 = shZ + pos[ii3+11];
596 for(k=nj0; (k<nj1); k++)
599 weight_cg2 = wf[jnr];
600 weight_product = weight_cg1*weight_cg2;
601 if (weight_product < ALMOST_ZERO) {
602 /* force is zero, skip this molecule */
605 else if (weight_product >= ALMOST_ONE)
611 hybscal = weight_product;
629 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
633 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
637 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
641 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
645 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
649 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
653 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
657 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
661 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
665 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
666 rinv11 = 1.0/sqrt(rsq11);
667 rinv22 = 1.0/sqrt(rsq22);
668 rinv23 = 1.0/sqrt(rsq23);
669 rinv24 = 1.0/sqrt(rsq24);
670 rinv32 = 1.0/sqrt(rsq32);
671 rinv33 = 1.0/sqrt(rsq33);
672 rinv34 = 1.0/sqrt(rsq34);
673 rinv42 = 1.0/sqrt(rsq42);
674 rinv43 = 1.0/sqrt(rsq43);
675 rinv44 = 1.0/sqrt(rsq44);
676 rinvsq = rinv11*rinv11;
677 rinvsix = rinvsq*rinvsq*rinvsq;
679 br = cexp2*rsq11*rinv11;
680 Vvdwexp = cexp1*exp(-br);
681 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6;
682 fscal = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
684 if(force_cap>0 && (fabs(fscal)> force_cap)){
685 fscal=force_cap*fscal/fabs(fscal);
693 faction[j3+0] = faction[j3+0] - tx;
694 faction[j3+1] = faction[j3+1] - ty;
695 faction[j3+2] = faction[j3+2] - tz;
697 rinvsq = rinv22*rinv22;
700 fscal = (vcoul)*rinvsq;
702 if(force_cap>0 && (fabs(fscal)> force_cap)){
703 fscal=force_cap*fscal/fabs(fscal);
711 fjx2 = faction[j3+3] - tx;
712 fjy2 = faction[j3+4] - ty;
713 fjz2 = faction[j3+5] - tz;
715 rinvsq = rinv23*rinv23;
718 fscal = (vcoul)*rinvsq;
720 if(force_cap>0 && (fabs(fscal)> force_cap)){
721 fscal=force_cap*fscal/fabs(fscal);
729 fjx3 = faction[j3+6] - tx;
730 fjy3 = faction[j3+7] - ty;
731 fjz3 = faction[j3+8] - tz;
733 rinvsq = rinv24*rinv24;
736 fscal = (vcoul)*rinvsq;
738 if(force_cap>0 && (fabs(fscal)> force_cap)){
739 fscal=force_cap*fscal/fabs(fscal);
747 fjx4 = faction[j3+9] - tx;
748 fjy4 = faction[j3+10] - ty;
749 fjz4 = faction[j3+11] - tz;
751 rinvsq = rinv32*rinv32;
754 fscal = (vcoul)*rinvsq;
756 if(force_cap>0 && (fabs(fscal)> force_cap)){
757 fscal=force_cap*fscal/fabs(fscal);
769 rinvsq = rinv33*rinv33;
772 fscal = (vcoul)*rinvsq;
774 if(force_cap>0 && (fabs(fscal)> force_cap)){
775 fscal=force_cap*fscal/fabs(fscal);
787 rinvsq = rinv34*rinv34;
790 fscal = (vcoul)*rinvsq;
792 if(force_cap>0 && (fabs(fscal)> force_cap)){
793 fscal=force_cap*fscal/fabs(fscal);
805 rinvsq = rinv42*rinv42;
808 fscal = (vcoul)*rinvsq;
810 if(force_cap>0 && (fabs(fscal)> force_cap)){
811 fscal=force_cap*fscal/fabs(fscal);
819 faction[j3+3] = fjx2 - tx;
820 faction[j3+4] = fjy2 - ty;
821 faction[j3+5] = fjz2 - tz;
823 rinvsq = rinv43*rinv43;
826 fscal = (vcoul)*rinvsq;
828 if(force_cap>0 && (fabs(fscal)> force_cap)){
829 fscal=force_cap*fscal/fabs(fscal);
837 faction[j3+6] = fjx3 - tx;
838 faction[j3+7] = fjy3 - ty;
839 faction[j3+8] = fjz3 - tz;
841 rinvsq = rinv44*rinv44;
844 fscal = (vcoul)*rinvsq;
846 if(force_cap>0 && (fabs(fscal)> force_cap)){
847 fscal=force_cap*fscal/fabs(fscal);
855 faction[j3+9] = fjx4 - tx;
856 faction[j3+10] = fjy4 - ty;
857 faction[j3+11] = fjz4 - tz;
860 faction[ii3+0] = faction[ii3+0] + fix1;
861 faction[ii3+1] = faction[ii3+1] + fiy1;
862 faction[ii3+2] = faction[ii3+2] + fiz1;
863 faction[ii3+3] = faction[ii3+3] + fix2;
864 faction[ii3+4] = faction[ii3+4] + fiy2;
865 faction[ii3+5] = faction[ii3+5] + fiz2;
866 faction[ii3+6] = faction[ii3+6] + fix3;
867 faction[ii3+7] = faction[ii3+7] + fiy3;
868 faction[ii3+8] = faction[ii3+8] + fiz3;
869 faction[ii3+9] = faction[ii3+9] + fix4;
870 faction[ii3+10] = faction[ii3+10] + fiy4;
871 faction[ii3+11] = faction[ii3+11] + fiz4;
872 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
873 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
874 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
876 Vc[ggid] = Vc[ggid] + vctot;
877 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
878 ninner = ninner + nj1 - nj0;
881 nouter = nouter + nn1 - nn0;