2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel114_adress.h"
32 * Gromacs nonbonded kernel nb_kernel114_adress_cg
33 * Coulomb interaction: Normal Coulomb
34 * VdW interaction: Lennard-Jones
35 * water optimization: pairs of TIP4P interactions
36 * Calculate forces: yes
38 void nb_kernel114_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
84 real ix1,iy1,iz1,fix1,fiy1,fiz1;
85 real ix2,iy2,iz2,fix2,fiy2,fiz2;
86 real ix3,iy3,iz3,fix3,fiy3,fiz3;
87 real ix4,iy4,iz4,fix4,fiy4,fiz4;
89 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
90 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
91 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
92 real dx11,dy11,dz11,rsq11;
93 real dx22,dy22,dz22,rsq22,rinv22;
94 real dx23,dy23,dz23,rsq23,rinv23;
95 real dx24,dy24,dz24,rsq24,rinv24;
96 real dx32,dy32,dz32,rsq32,rinv32;
97 real dx33,dy33,dz33,rsq33,rinv33;
98 real dx34,dy34,dz34,rsq34,rinv34;
99 real dx42,dy42,dz42,rsq42,rinv42;
100 real dx43,dy43,dz43,rsq43,rinv43;
101 real dx44,dy44,dz44,rsq44,rinv44;
102 real qH,qM,qqMM,qqMH,qqHH;
104 real weight_cg1, weight_cg2, weight_product;
109 nthreads = *p_nthreads;
113 tabscale = *p_tabscale;
120 tj = 2*(ntype+1)*type[ii];
122 c12 = vdwparam[tj+1];
129 #ifdef GMX_THREAD_SHM_FDECOMP
130 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
132 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
134 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
141 for(n=nn0; (n<nn1); n++)
145 shY = shiftvec[is3+1];
146 shZ = shiftvec[is3+2];
151 ix1 = shX + pos[ii3+0];
152 iy1 = shY + pos[ii3+1];
153 iz1 = shZ + pos[ii3+2];
154 ix2 = shX + pos[ii3+3];
155 iy2 = shY + pos[ii3+4];
156 iz2 = shZ + pos[ii3+5];
157 ix3 = shX + pos[ii3+6];
158 iy3 = shY + pos[ii3+7];
159 iz3 = shZ + pos[ii3+8];
160 ix4 = shX + pos[ii3+9];
161 iy4 = shY + pos[ii3+10];
162 iz4 = shZ + pos[ii3+11];
179 for(k=nj0; (k<nj1); k++)
182 weight_cg2 = wf[jnr];
183 weight_product = weight_cg1*weight_cg2;
184 if (weight_product < ALMOST_ZERO) {
187 else if (weight_product >= ALMOST_ONE)
189 /* force is zero, skip this molecule */
194 hybscal = 1.0 - weight_product;
212 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
216 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
220 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
224 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
228 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
232 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
236 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
240 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
244 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
248 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
250 rinv22 = 1.0/sqrt(rsq22);
251 rinv23 = 1.0/sqrt(rsq23);
252 rinv24 = 1.0/sqrt(rsq24);
253 rinv32 = 1.0/sqrt(rsq32);
254 rinv33 = 1.0/sqrt(rsq33);
255 rinv34 = 1.0/sqrt(rsq34);
256 rinv42 = 1.0/sqrt(rsq42);
257 rinv43 = 1.0/sqrt(rsq43);
258 rinv44 = 1.0/sqrt(rsq44);
259 rinvsix = rinvsq*rinvsq*rinvsq;
261 Vvdw12 = c12*rinvsix*rinvsix;
262 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
263 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
271 faction[j3+0] = faction[j3+0] - tx;
272 faction[j3+1] = faction[j3+1] - ty;
273 faction[j3+2] = faction[j3+2] - tz;
275 rinvsq = rinv22*rinv22;
278 fscal = (vcoul)*rinvsq;
286 fjx2 = faction[j3+3] - tx;
287 fjy2 = faction[j3+4] - ty;
288 fjz2 = faction[j3+5] - tz;
290 rinvsq = rinv23*rinv23;
293 fscal = (vcoul)*rinvsq;
301 fjx3 = faction[j3+6] - tx;
302 fjy3 = faction[j3+7] - ty;
303 fjz3 = faction[j3+8] - tz;
305 rinvsq = rinv24*rinv24;
308 fscal = (vcoul)*rinvsq;
316 fjx4 = faction[j3+9] - tx;
317 fjy4 = faction[j3+10] - ty;
318 fjz4 = faction[j3+11] - tz;
320 rinvsq = rinv32*rinv32;
323 fscal = (vcoul)*rinvsq;
335 rinvsq = rinv33*rinv33;
338 fscal = (vcoul)*rinvsq;
350 rinvsq = rinv34*rinv34;
353 fscal = (vcoul)*rinvsq;
365 rinvsq = rinv42*rinv42;
368 fscal = (vcoul)*rinvsq;
376 faction[j3+3] = fjx2 - tx;
377 faction[j3+4] = fjy2 - ty;
378 faction[j3+5] = fjz2 - tz;
380 rinvsq = rinv43*rinv43;
383 fscal = (vcoul)*rinvsq;
391 faction[j3+6] = fjx3 - tx;
392 faction[j3+7] = fjy3 - ty;
393 faction[j3+8] = fjz3 - tz;
395 rinvsq = rinv44*rinv44;
398 fscal = (vcoul)*rinvsq;
406 faction[j3+9] = fjx4 - tx;
407 faction[j3+10] = fjy4 - ty;
408 faction[j3+11] = fjz4 - tz;
411 faction[ii3+0] = faction[ii3+0] + fix1;
412 faction[ii3+1] = faction[ii3+1] + fiy1;
413 faction[ii3+2] = faction[ii3+2] + fiz1;
414 faction[ii3+3] = faction[ii3+3] + fix2;
415 faction[ii3+4] = faction[ii3+4] + fiy2;
416 faction[ii3+5] = faction[ii3+5] + fiz2;
417 faction[ii3+6] = faction[ii3+6] + fix3;
418 faction[ii3+7] = faction[ii3+7] + fiy3;
419 faction[ii3+8] = faction[ii3+8] + fiz3;
420 faction[ii3+9] = faction[ii3+9] + fix4;
421 faction[ii3+10] = faction[ii3+10] + fiy4;
422 faction[ii3+11] = faction[ii3+11] + fiz4;
423 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
424 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
425 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
427 Vc[ggid] = Vc[ggid] + vctot;
428 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
429 ninner = ninner + nj1 - nj0;
432 nouter = nouter + nn1 - nn0;
445 * Gromacs nonbonded kernel nb_kernel114_adress_ex
446 * Coulomb interaction: Normal Coulomb
447 * VdW interaction: Lennard-Jones
448 * water optimization: pairs of TIP4P interactions
449 * Calculate forces: yes
451 void nb_kernel114_adress_ex(
485 int nri,ntype,nthreads;
486 real facel,krf,crf,tabscale,gbtabscale;
487 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
488 int nn0,nn1,nouter,ninner;
497 real ix1,iy1,iz1,fix1,fiy1,fiz1;
498 real ix2,iy2,iz2,fix2,fiy2,fiz2;
499 real ix3,iy3,iz3,fix3,fiy3,fiz3;
500 real ix4,iy4,iz4,fix4,fiy4,fiz4;
502 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
503 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
504 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
505 real dx11,dy11,dz11,rsq11;
506 real dx22,dy22,dz22,rsq22,rinv22;
507 real dx23,dy23,dz23,rsq23,rinv23;
508 real dx24,dy24,dz24,rsq24,rinv24;
509 real dx32,dy32,dz32,rsq32,rinv32;
510 real dx33,dy33,dz33,rsq33,rinv33;
511 real dx34,dy34,dz34,rsq34,rinv34;
512 real dx42,dy42,dz42,rsq42,rinv42;
513 real dx43,dy43,dz43,rsq43,rinv43;
514 real dx44,dy44,dz44,rsq44,rinv44;
515 real qH,qM,qqMM,qqMH,qqHH;
517 real weight_cg1, weight_cg2, weight_product;
522 nthreads = *p_nthreads;
526 tabscale = *p_tabscale;
533 tj = 2*(ntype+1)*type[ii];
535 c12 = vdwparam[tj+1];
542 #ifdef GMX_THREAD_SHM_FDECOMP
543 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
545 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
547 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
554 for(n=nn0; (n<nn1); n++)
558 shY = shiftvec[is3+1];
559 shZ = shiftvec[is3+2];
564 ix1 = shX + pos[ii3+0];
565 iy1 = shY + pos[ii3+1];
566 iz1 = shZ + pos[ii3+2];
567 ix2 = shX + pos[ii3+3];
568 iy2 = shY + pos[ii3+4];
569 iz2 = shZ + pos[ii3+5];
570 ix3 = shX + pos[ii3+6];
571 iy3 = shY + pos[ii3+7];
572 iz3 = shZ + pos[ii3+8];
573 ix4 = shX + pos[ii3+9];
574 iy4 = shY + pos[ii3+10];
575 iz4 = shZ + pos[ii3+11];
592 for(k=nj0; (k<nj1); k++)
595 weight_cg2 = wf[jnr];
596 weight_product = weight_cg1*weight_cg2;
597 if (weight_product < ALMOST_ZERO) {
598 /* force is zero, skip this molecule */
601 else if (weight_product >= ALMOST_ONE)
607 hybscal = weight_product;
625 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
629 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
633 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
637 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
641 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
645 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
649 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
653 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
657 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
661 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
663 rinv22 = 1.0/sqrt(rsq22);
664 rinv23 = 1.0/sqrt(rsq23);
665 rinv24 = 1.0/sqrt(rsq24);
666 rinv32 = 1.0/sqrt(rsq32);
667 rinv33 = 1.0/sqrt(rsq33);
668 rinv34 = 1.0/sqrt(rsq34);
669 rinv42 = 1.0/sqrt(rsq42);
670 rinv43 = 1.0/sqrt(rsq43);
671 rinv44 = 1.0/sqrt(rsq44);
672 rinvsix = rinvsq*rinvsq*rinvsq;
674 Vvdw12 = c12*rinvsix*rinvsix;
675 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
676 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
678 if(force_cap>0 && (fabs(fscal)> force_cap)){
679 fscal=force_cap*fscal/fabs(fscal);
687 faction[j3+0] = faction[j3+0] - tx;
688 faction[j3+1] = faction[j3+1] - ty;
689 faction[j3+2] = faction[j3+2] - tz;
691 rinvsq = rinv22*rinv22;
694 fscal = (vcoul)*rinvsq;
696 if(force_cap>0 && (fabs(fscal)> force_cap)){
697 fscal=force_cap*fscal/fabs(fscal);
705 fjx2 = faction[j3+3] - tx;
706 fjy2 = faction[j3+4] - ty;
707 fjz2 = faction[j3+5] - tz;
709 rinvsq = rinv23*rinv23;
712 fscal = (vcoul)*rinvsq;
714 if(force_cap>0 && (fabs(fscal)> force_cap)){
715 fscal=force_cap*fscal/fabs(fscal);
723 fjx3 = faction[j3+6] - tx;
724 fjy3 = faction[j3+7] - ty;
725 fjz3 = faction[j3+8] - tz;
727 rinvsq = rinv24*rinv24;
730 fscal = (vcoul)*rinvsq;
732 if(force_cap>0 && (fabs(fscal)> force_cap)){
733 fscal=force_cap*fscal/fabs(fscal);
741 fjx4 = faction[j3+9] - tx;
742 fjy4 = faction[j3+10] - ty;
743 fjz4 = faction[j3+11] - tz;
745 rinvsq = rinv32*rinv32;
748 fscal = (vcoul)*rinvsq;
750 if(force_cap>0 && (fabs(fscal)> force_cap)){
751 fscal=force_cap*fscal/fabs(fscal);
763 rinvsq = rinv33*rinv33;
766 fscal = (vcoul)*rinvsq;
768 if(force_cap>0 && (fabs(fscal)> force_cap)){
769 fscal=force_cap*fscal/fabs(fscal);
781 rinvsq = rinv34*rinv34;
784 fscal = (vcoul)*rinvsq;
786 if(force_cap>0 && (fabs(fscal)> force_cap)){
787 fscal=force_cap*fscal/fabs(fscal);
799 rinvsq = rinv42*rinv42;
802 fscal = (vcoul)*rinvsq;
804 if(force_cap>0 && (fabs(fscal)> force_cap)){
805 fscal=force_cap*fscal/fabs(fscal);
813 faction[j3+3] = fjx2 - tx;
814 faction[j3+4] = fjy2 - ty;
815 faction[j3+5] = fjz2 - tz;
817 rinvsq = rinv43*rinv43;
820 fscal = (vcoul)*rinvsq;
822 if(force_cap>0 && (fabs(fscal)> force_cap)){
823 fscal=force_cap*fscal/fabs(fscal);
831 faction[j3+6] = fjx3 - tx;
832 faction[j3+7] = fjy3 - ty;
833 faction[j3+8] = fjz3 - tz;
835 rinvsq = rinv44*rinv44;
838 fscal = (vcoul)*rinvsq;
840 if(force_cap>0 && (fabs(fscal)> force_cap)){
841 fscal=force_cap*fscal/fabs(fscal);
849 faction[j3+9] = fjx4 - tx;
850 faction[j3+10] = fjy4 - ty;
851 faction[j3+11] = fjz4 - tz;
854 faction[ii3+0] = faction[ii3+0] + fix1;
855 faction[ii3+1] = faction[ii3+1] + fiy1;
856 faction[ii3+2] = faction[ii3+2] + fiz1;
857 faction[ii3+3] = faction[ii3+3] + fix2;
858 faction[ii3+4] = faction[ii3+4] + fiy2;
859 faction[ii3+5] = faction[ii3+5] + fiz2;
860 faction[ii3+6] = faction[ii3+6] + fix3;
861 faction[ii3+7] = faction[ii3+7] + fiy3;
862 faction[ii3+8] = faction[ii3+8] + fiz3;
863 faction[ii3+9] = faction[ii3+9] + fix4;
864 faction[ii3+10] = faction[ii3+10] + fiy4;
865 faction[ii3+11] = faction[ii3+11] + fiz4;
866 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
867 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
868 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
870 Vc[ggid] = Vc[ggid] + vctot;
871 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
872 ninner = ninner + nj1 - nj0;
875 nouter = nouter + nn1 - nn0;