2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel132_adress.h"
32 * Gromacs nonbonded kernel nb_kernel132_adress_cg
33 * Coulomb interaction: Normal Coulomb
34 * VdW interaction: Tabulated
35 * water optimization: pairs of SPC/TIP3P interactions
36 * Calculate forces: yes
38 void nb_kernel132_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
85 real Y,F,Geps,Heps2,Fp,VV;
88 real ix1,iy1,iz1,fix1,fiy1,fiz1;
89 real ix2,iy2,iz2,fix2,fiy2,fiz2;
90 real ix3,iy3,iz3,fix3,fiy3,fiz3;
91 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
92 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
93 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
94 real dx11,dy11,dz11,rsq11,rinv11;
95 real dx12,dy12,dz12,rsq12,rinv12;
96 real dx13,dy13,dz13,rsq13,rinv13;
97 real dx21,dy21,dz21,rsq21,rinv21;
98 real dx22,dy22,dz22,rsq22,rinv22;
99 real dx23,dy23,dz23,rsq23,rinv23;
100 real dx31,dy31,dz31,rsq31,rinv31;
101 real dx32,dy32,dz32,rsq32,rinv32;
102 real dx33,dy33,dz33,rsq33,rinv33;
103 real qO,qH,qqOO,qqOH,qqHH;
105 real weight_cg1, weight_cg2, weight_product;
110 nthreads = *p_nthreads;
114 tabscale = *p_tabscale;
121 tj = 2*(ntype+1)*type[ii];
123 c12 = vdwparam[tj+1];
130 #ifdef GMX_THREAD_SHM_FDECOMP
131 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
133 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
135 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
142 for(n=nn0; (n<nn1); n++)
146 shY = shiftvec[is3+1];
147 shZ = shiftvec[is3+2];
152 ix1 = shX + pos[ii3+0];
153 iy1 = shY + pos[ii3+1];
154 iz1 = shZ + pos[ii3+2];
155 ix2 = shX + pos[ii3+3];
156 iy2 = shY + pos[ii3+4];
157 iz2 = shZ + pos[ii3+5];
158 ix3 = shX + pos[ii3+6];
159 iy3 = shY + pos[ii3+7];
160 iz3 = shZ + pos[ii3+8];
174 for(k=nj0; (k<nj1); k++)
177 weight_cg2 = wf[jnr];
178 weight_product = weight_cg1*weight_cg2;
179 if (weight_product < ALMOST_ZERO) {
182 else if (weight_product >= ALMOST_ONE)
184 /* force is zero, skip this molecule */
189 hybscal = 1.0 - weight_product;
204 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
208 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
212 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
216 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
220 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
224 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
228 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
232 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
236 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
237 rinv11 = 1.0/sqrt(rsq11);
238 rinv12 = 1.0/sqrt(rsq12);
239 rinv13 = 1.0/sqrt(rsq13);
240 rinv21 = 1.0/sqrt(rsq21);
241 rinv22 = 1.0/sqrt(rsq22);
242 rinv23 = 1.0/sqrt(rsq23);
243 rinv31 = 1.0/sqrt(rsq31);
244 rinv32 = 1.0/sqrt(rsq32);
245 rinv33 = 1.0/sqrt(rsq33);
247 rinvsq = rinv11*rinv11;
258 Geps = eps*VFtab[nnn+2];
259 Heps2 = eps2*VFtab[nnn+3];
262 FF = Fp+Geps+2.0*Heps2;
268 Geps = eps*VFtab[nnn+2];
269 Heps2 = eps2*VFtab[nnn+3];
272 FF = Fp+Geps+2.0*Heps2;
275 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
276 fscal = (vcoul)*rinvsq-((fijD+fijR)*tabscale)*rinv11;
284 fjx1 = faction[j3+0] - tx;
285 fjy1 = faction[j3+1] - ty;
286 fjz1 = faction[j3+2] - tz;
288 rinvsq = rinv12*rinv12;
291 fscal = (vcoul)*rinvsq;
299 fjx2 = faction[j3+3] - tx;
300 fjy2 = faction[j3+4] - ty;
301 fjz2 = faction[j3+5] - tz;
303 rinvsq = rinv13*rinv13;
306 fscal = (vcoul)*rinvsq;
314 fjx3 = faction[j3+6] - tx;
315 fjy3 = faction[j3+7] - ty;
316 fjz3 = faction[j3+8] - tz;
318 rinvsq = rinv21*rinv21;
321 fscal = (vcoul)*rinvsq;
333 rinvsq = rinv22*rinv22;
336 fscal = (vcoul)*rinvsq;
348 rinvsq = rinv23*rinv23;
351 fscal = (vcoul)*rinvsq;
363 rinvsq = rinv31*rinv31;
366 fscal = (vcoul)*rinvsq;
374 faction[j3+0] = fjx1 - tx;
375 faction[j3+1] = fjy1 - ty;
376 faction[j3+2] = fjz1 - tz;
378 rinvsq = rinv32*rinv32;
381 fscal = (vcoul)*rinvsq;
389 faction[j3+3] = fjx2 - tx;
390 faction[j3+4] = fjy2 - ty;
391 faction[j3+5] = fjz2 - tz;
393 rinvsq = rinv33*rinv33;
396 fscal = (vcoul)*rinvsq;
404 faction[j3+6] = fjx3 - tx;
405 faction[j3+7] = fjy3 - ty;
406 faction[j3+8] = fjz3 - tz;
409 faction[ii3+0] = faction[ii3+0] + fix1;
410 faction[ii3+1] = faction[ii3+1] + fiy1;
411 faction[ii3+2] = faction[ii3+2] + fiz1;
412 faction[ii3+3] = faction[ii3+3] + fix2;
413 faction[ii3+4] = faction[ii3+4] + fiy2;
414 faction[ii3+5] = faction[ii3+5] + fiz2;
415 faction[ii3+6] = faction[ii3+6] + fix3;
416 faction[ii3+7] = faction[ii3+7] + fiy3;
417 faction[ii3+8] = faction[ii3+8] + fiz3;
418 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
419 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
420 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
422 Vc[ggid] = Vc[ggid] + vctot;
423 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
424 ninner = ninner + nj1 - nj0;
427 nouter = nouter + nn1 - nn0;
440 * Gromacs nonbonded kernel nb_kernel132_adress_ex
441 * Coulomb interaction: Normal Coulomb
442 * VdW interaction: Tabulated
443 * water optimization: pairs of SPC/TIP3P interactions
444 * Calculate forces: yes
446 void nb_kernel132_adress_ex(
480 int nri,ntype,nthreads;
481 real facel,krf,crf,tabscale,gbtabscale;
482 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
483 int nn0,nn1,nouter,ninner;
493 real Y,F,Geps,Heps2,Fp,VV;
496 real ix1,iy1,iz1,fix1,fiy1,fiz1;
497 real ix2,iy2,iz2,fix2,fiy2,fiz2;
498 real ix3,iy3,iz3,fix3,fiy3,fiz3;
499 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
500 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
501 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
502 real dx11,dy11,dz11,rsq11,rinv11;
503 real dx12,dy12,dz12,rsq12,rinv12;
504 real dx13,dy13,dz13,rsq13,rinv13;
505 real dx21,dy21,dz21,rsq21,rinv21;
506 real dx22,dy22,dz22,rsq22,rinv22;
507 real dx23,dy23,dz23,rsq23,rinv23;
508 real dx31,dy31,dz31,rsq31,rinv31;
509 real dx32,dy32,dz32,rsq32,rinv32;
510 real dx33,dy33,dz33,rsq33,rinv33;
511 real qO,qH,qqOO,qqOH,qqHH;
513 real weight_cg1, weight_cg2, weight_product;
518 nthreads = *p_nthreads;
522 tabscale = *p_tabscale;
529 tj = 2*(ntype+1)*type[ii];
531 c12 = vdwparam[tj+1];
538 #ifdef GMX_THREAD_SHM_FDECOMP
539 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
541 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
543 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
550 for(n=nn0; (n<nn1); n++)
554 shY = shiftvec[is3+1];
555 shZ = shiftvec[is3+2];
560 ix1 = shX + pos[ii3+0];
561 iy1 = shY + pos[ii3+1];
562 iz1 = shZ + pos[ii3+2];
563 ix2 = shX + pos[ii3+3];
564 iy2 = shY + pos[ii3+4];
565 iz2 = shZ + pos[ii3+5];
566 ix3 = shX + pos[ii3+6];
567 iy3 = shY + pos[ii3+7];
568 iz3 = shZ + pos[ii3+8];
582 for(k=nj0; (k<nj1); k++)
585 weight_cg2 = wf[jnr];
586 weight_product = weight_cg1*weight_cg2;
587 if (weight_product < ALMOST_ZERO) {
588 /* force is zero, skip this molecule */
591 else if (weight_product >= ALMOST_ONE)
597 hybscal = weight_product;
612 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
616 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
620 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
624 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
628 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
632 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
636 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
640 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
644 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
645 rinv11 = 1.0/sqrt(rsq11);
646 rinv12 = 1.0/sqrt(rsq12);
647 rinv13 = 1.0/sqrt(rsq13);
648 rinv21 = 1.0/sqrt(rsq21);
649 rinv22 = 1.0/sqrt(rsq22);
650 rinv23 = 1.0/sqrt(rsq23);
651 rinv31 = 1.0/sqrt(rsq31);
652 rinv32 = 1.0/sqrt(rsq32);
653 rinv33 = 1.0/sqrt(rsq33);
655 rinvsq = rinv11*rinv11;
666 Geps = eps*VFtab[nnn+2];
667 Heps2 = eps2*VFtab[nnn+3];
670 FF = Fp+Geps+2.0*Heps2;
676 Geps = eps*VFtab[nnn+2];
677 Heps2 = eps2*VFtab[nnn+3];
680 FF = Fp+Geps+2.0*Heps2;
683 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
684 fscal = (vcoul)*rinvsq-((fijD+fijR)*tabscale)*rinv11;
686 if(force_cap>0 && (fabs(fscal)> force_cap)){
687 fscal=force_cap*fscal/fabs(fscal);
695 fjx1 = faction[j3+0] - tx;
696 fjy1 = faction[j3+1] - ty;
697 fjz1 = faction[j3+2] - tz;
699 rinvsq = rinv12*rinv12;
702 fscal = (vcoul)*rinvsq;
704 if(force_cap>0 && (fabs(fscal)> force_cap)){
705 fscal=force_cap*fscal/fabs(fscal);
713 fjx2 = faction[j3+3] - tx;
714 fjy2 = faction[j3+4] - ty;
715 fjz2 = faction[j3+5] - tz;
717 rinvsq = rinv13*rinv13;
720 fscal = (vcoul)*rinvsq;
722 if(force_cap>0 && (fabs(fscal)> force_cap)){
723 fscal=force_cap*fscal/fabs(fscal);
731 fjx3 = faction[j3+6] - tx;
732 fjy3 = faction[j3+7] - ty;
733 fjz3 = faction[j3+8] - tz;
735 rinvsq = rinv21*rinv21;
738 fscal = (vcoul)*rinvsq;
740 if(force_cap>0 && (fabs(fscal)> force_cap)){
741 fscal=force_cap*fscal/fabs(fscal);
753 rinvsq = rinv22*rinv22;
756 fscal = (vcoul)*rinvsq;
758 if(force_cap>0 && (fabs(fscal)> force_cap)){
759 fscal=force_cap*fscal/fabs(fscal);
771 rinvsq = rinv23*rinv23;
774 fscal = (vcoul)*rinvsq;
776 if(force_cap>0 && (fabs(fscal)> force_cap)){
777 fscal=force_cap*fscal/fabs(fscal);
789 rinvsq = rinv31*rinv31;
792 fscal = (vcoul)*rinvsq;
794 if(force_cap>0 && (fabs(fscal)> force_cap)){
795 fscal=force_cap*fscal/fabs(fscal);
803 faction[j3+0] = fjx1 - tx;
804 faction[j3+1] = fjy1 - ty;
805 faction[j3+2] = fjz1 - tz;
807 rinvsq = rinv32*rinv32;
810 fscal = (vcoul)*rinvsq;
812 if(force_cap>0 && (fabs(fscal)> force_cap)){
813 fscal=force_cap*fscal/fabs(fscal);
821 faction[j3+3] = fjx2 - tx;
822 faction[j3+4] = fjy2 - ty;
823 faction[j3+5] = fjz2 - tz;
825 rinvsq = rinv33*rinv33;
828 fscal = (vcoul)*rinvsq;
830 if(force_cap>0 && (fabs(fscal)> force_cap)){
831 fscal=force_cap*fscal/fabs(fscal);
839 faction[j3+6] = fjx3 - tx;
840 faction[j3+7] = fjy3 - ty;
841 faction[j3+8] = fjz3 - tz;
844 faction[ii3+0] = faction[ii3+0] + fix1;
845 faction[ii3+1] = faction[ii3+1] + fiy1;
846 faction[ii3+2] = faction[ii3+2] + fiz1;
847 faction[ii3+3] = faction[ii3+3] + fix2;
848 faction[ii3+4] = faction[ii3+4] + fiy2;
849 faction[ii3+5] = faction[ii3+5] + fiz2;
850 faction[ii3+6] = faction[ii3+6] + fix3;
851 faction[ii3+7] = faction[ii3+7] + fiy3;
852 faction[ii3+8] = faction[ii3+8] + fiz3;
853 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
854 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
855 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
857 Vc[ggid] = Vc[ggid] + vctot;
858 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
859 ninner = ninner + nj1 - nj0;
862 nouter = nouter + nn1 - nn0;