2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel232_adress.h"
32 * Gromacs nonbonded kernel nb_kernel232_adress_cg
33 * Coulomb interaction: Reaction field
34 * VdW interaction: Tabulated
35 * water optimization: pairs of SPC/TIP3P interactions
36 * Calculate forces: yes
38 void nb_kernel232_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
85 real Y,F,Geps,Heps2,Fp,VV;
89 real ix1,iy1,iz1,fix1,fiy1,fiz1;
90 real ix2,iy2,iz2,fix2,fiy2,fiz2;
91 real ix3,iy3,iz3,fix3,fiy3,fiz3;
92 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
93 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
94 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
95 real dx11,dy11,dz11,rsq11,rinv11;
96 real dx12,dy12,dz12,rsq12,rinv12;
97 real dx13,dy13,dz13,rsq13,rinv13;
98 real dx21,dy21,dz21,rsq21,rinv21;
99 real dx22,dy22,dz22,rsq22,rinv22;
100 real dx23,dy23,dz23,rsq23,rinv23;
101 real dx31,dy31,dz31,rsq31,rinv31;
102 real dx32,dy32,dz32,rsq32,rinv32;
103 real dx33,dy33,dz33,rsq33,rinv33;
104 real qO,qH,qqOO,qqOH,qqHH;
106 real weight_cg1, weight_cg2, weight_product;
111 nthreads = *p_nthreads;
115 tabscale = *p_tabscale;
122 tj = 2*(ntype+1)*type[ii];
124 c12 = vdwparam[tj+1];
131 #ifdef GMX_THREAD_SHM_FDECOMP
132 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
134 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
136 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
143 for(n=nn0; (n<nn1); n++)
147 shY = shiftvec[is3+1];
148 shZ = shiftvec[is3+2];
153 ix1 = shX + pos[ii3+0];
154 iy1 = shY + pos[ii3+1];
155 iz1 = shZ + pos[ii3+2];
156 ix2 = shX + pos[ii3+3];
157 iy2 = shY + pos[ii3+4];
158 iz2 = shZ + pos[ii3+5];
159 ix3 = shX + pos[ii3+6];
160 iy3 = shY + pos[ii3+7];
161 iz3 = shZ + pos[ii3+8];
175 for(k=nj0; (k<nj1); k++)
178 weight_cg2 = wf[jnr];
179 weight_product = weight_cg1*weight_cg2;
180 if (weight_product < ALMOST_ZERO) {
183 else if (weight_product >= ALMOST_ONE)
185 /* force is zero, skip this molecule */
190 hybscal = 1.0 - weight_product;
205 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
209 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
213 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
217 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
221 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
225 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
229 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
233 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
237 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
238 rinv11 = 1.0/sqrt(rsq11);
239 rinv12 = 1.0/sqrt(rsq12);
240 rinv13 = 1.0/sqrt(rsq13);
241 rinv21 = 1.0/sqrt(rsq21);
242 rinv22 = 1.0/sqrt(rsq22);
243 rinv23 = 1.0/sqrt(rsq23);
244 rinv31 = 1.0/sqrt(rsq31);
245 rinv32 = 1.0/sqrt(rsq32);
246 rinv33 = 1.0/sqrt(rsq33);
248 rinvsq = rinv11*rinv11;
250 vcoul = qq*(rinv11+krsq-crf);
260 Geps = eps*VFtab[nnn+2];
261 Heps2 = eps2*VFtab[nnn+3];
264 FF = Fp+Geps+2.0*Heps2;
270 Geps = eps*VFtab[nnn+2];
271 Heps2 = eps2*VFtab[nnn+3];
274 FF = Fp+Geps+2.0*Heps2;
277 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
278 fscal = (qq*(rinv11-2.0*krsq))*rinvsq-((fijD+fijR)*tabscale)*rinv11;
286 fjx1 = faction[j3+0] - tx;
287 fjy1 = faction[j3+1] - ty;
288 fjz1 = faction[j3+2] - tz;
290 rinvsq = rinv12*rinv12;
292 vcoul = qq*(rinv12+krsq-crf);
294 fscal = (qq*(rinv12-2.0*krsq))*rinvsq;
302 fjx2 = faction[j3+3] - tx;
303 fjy2 = faction[j3+4] - ty;
304 fjz2 = faction[j3+5] - tz;
306 rinvsq = rinv13*rinv13;
308 vcoul = qq*(rinv13+krsq-crf);
310 fscal = (qq*(rinv13-2.0*krsq))*rinvsq;
318 fjx3 = faction[j3+6] - tx;
319 fjy3 = faction[j3+7] - ty;
320 fjz3 = faction[j3+8] - tz;
322 rinvsq = rinv21*rinv21;
324 vcoul = qq*(rinv21+krsq-crf);
326 fscal = (qq*(rinv21-2.0*krsq))*rinvsq;
338 rinvsq = rinv22*rinv22;
340 vcoul = qq*(rinv22+krsq-crf);
342 fscal = (qq*(rinv22-2.0*krsq))*rinvsq;
354 rinvsq = rinv23*rinv23;
356 vcoul = qq*(rinv23+krsq-crf);
358 fscal = (qq*(rinv23-2.0*krsq))*rinvsq;
370 rinvsq = rinv31*rinv31;
372 vcoul = qq*(rinv31+krsq-crf);
374 fscal = (qq*(rinv31-2.0*krsq))*rinvsq;
382 faction[j3+0] = fjx1 - tx;
383 faction[j3+1] = fjy1 - ty;
384 faction[j3+2] = fjz1 - tz;
386 rinvsq = rinv32*rinv32;
388 vcoul = qq*(rinv32+krsq-crf);
390 fscal = (qq*(rinv32-2.0*krsq))*rinvsq;
398 faction[j3+3] = fjx2 - tx;
399 faction[j3+4] = fjy2 - ty;
400 faction[j3+5] = fjz2 - tz;
402 rinvsq = rinv33*rinv33;
404 vcoul = qq*(rinv33+krsq-crf);
406 fscal = (qq*(rinv33-2.0*krsq))*rinvsq;
414 faction[j3+6] = fjx3 - tx;
415 faction[j3+7] = fjy3 - ty;
416 faction[j3+8] = fjz3 - tz;
419 faction[ii3+0] = faction[ii3+0] + fix1;
420 faction[ii3+1] = faction[ii3+1] + fiy1;
421 faction[ii3+2] = faction[ii3+2] + fiz1;
422 faction[ii3+3] = faction[ii3+3] + fix2;
423 faction[ii3+4] = faction[ii3+4] + fiy2;
424 faction[ii3+5] = faction[ii3+5] + fiz2;
425 faction[ii3+6] = faction[ii3+6] + fix3;
426 faction[ii3+7] = faction[ii3+7] + fiy3;
427 faction[ii3+8] = faction[ii3+8] + fiz3;
428 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
429 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
430 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
432 Vc[ggid] = Vc[ggid] + vctot;
433 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
434 ninner = ninner + nj1 - nj0;
437 nouter = nouter + nn1 - nn0;
450 * Gromacs nonbonded kernel nb_kernel232_adress_ex
451 * Coulomb interaction: Reaction field
452 * VdW interaction: Tabulated
453 * water optimization: pairs of SPC/TIP3P interactions
454 * Calculate forces: yes
456 void nb_kernel232_adress_ex(
490 int nri,ntype,nthreads;
491 real facel,krf,crf,tabscale,gbtabscale;
492 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
493 int nn0,nn1,nouter,ninner;
503 real Y,F,Geps,Heps2,Fp,VV;
507 real ix1,iy1,iz1,fix1,fiy1,fiz1;
508 real ix2,iy2,iz2,fix2,fiy2,fiz2;
509 real ix3,iy3,iz3,fix3,fiy3,fiz3;
510 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
511 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
512 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
513 real dx11,dy11,dz11,rsq11,rinv11;
514 real dx12,dy12,dz12,rsq12,rinv12;
515 real dx13,dy13,dz13,rsq13,rinv13;
516 real dx21,dy21,dz21,rsq21,rinv21;
517 real dx22,dy22,dz22,rsq22,rinv22;
518 real dx23,dy23,dz23,rsq23,rinv23;
519 real dx31,dy31,dz31,rsq31,rinv31;
520 real dx32,dy32,dz32,rsq32,rinv32;
521 real dx33,dy33,dz33,rsq33,rinv33;
522 real qO,qH,qqOO,qqOH,qqHH;
524 real weight_cg1, weight_cg2, weight_product;
529 nthreads = *p_nthreads;
533 tabscale = *p_tabscale;
540 tj = 2*(ntype+1)*type[ii];
542 c12 = vdwparam[tj+1];
549 #ifdef GMX_THREAD_SHM_FDECOMP
550 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
552 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
554 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
561 for(n=nn0; (n<nn1); n++)
565 shY = shiftvec[is3+1];
566 shZ = shiftvec[is3+2];
571 ix1 = shX + pos[ii3+0];
572 iy1 = shY + pos[ii3+1];
573 iz1 = shZ + pos[ii3+2];
574 ix2 = shX + pos[ii3+3];
575 iy2 = shY + pos[ii3+4];
576 iz2 = shZ + pos[ii3+5];
577 ix3 = shX + pos[ii3+6];
578 iy3 = shY + pos[ii3+7];
579 iz3 = shZ + pos[ii3+8];
593 for(k=nj0; (k<nj1); k++)
596 weight_cg2 = wf[jnr];
597 weight_product = weight_cg1*weight_cg2;
598 if (weight_product < ALMOST_ZERO) {
599 /* force is zero, skip this molecule */
602 else if (weight_product >= ALMOST_ONE)
608 hybscal = weight_product;
623 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
627 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
631 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
635 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
639 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
643 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
647 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
651 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
655 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
656 rinv11 = 1.0/sqrt(rsq11);
657 rinv12 = 1.0/sqrt(rsq12);
658 rinv13 = 1.0/sqrt(rsq13);
659 rinv21 = 1.0/sqrt(rsq21);
660 rinv22 = 1.0/sqrt(rsq22);
661 rinv23 = 1.0/sqrt(rsq23);
662 rinv31 = 1.0/sqrt(rsq31);
663 rinv32 = 1.0/sqrt(rsq32);
664 rinv33 = 1.0/sqrt(rsq33);
666 rinvsq = rinv11*rinv11;
668 vcoul = qq*(rinv11+krsq-crf);
678 Geps = eps*VFtab[nnn+2];
679 Heps2 = eps2*VFtab[nnn+3];
682 FF = Fp+Geps+2.0*Heps2;
688 Geps = eps*VFtab[nnn+2];
689 Heps2 = eps2*VFtab[nnn+3];
692 FF = Fp+Geps+2.0*Heps2;
695 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
696 fscal = (qq*(rinv11-2.0*krsq))*rinvsq-((fijD+fijR)*tabscale)*rinv11;
698 if(force_cap>0 && (fabs(fscal)> force_cap)){
699 fscal=force_cap*fscal/fabs(fscal);
707 fjx1 = faction[j3+0] - tx;
708 fjy1 = faction[j3+1] - ty;
709 fjz1 = faction[j3+2] - tz;
711 rinvsq = rinv12*rinv12;
713 vcoul = qq*(rinv12+krsq-crf);
715 fscal = (qq*(rinv12-2.0*krsq))*rinvsq;
717 if(force_cap>0 && (fabs(fscal)> force_cap)){
718 fscal=force_cap*fscal/fabs(fscal);
726 fjx2 = faction[j3+3] - tx;
727 fjy2 = faction[j3+4] - ty;
728 fjz2 = faction[j3+5] - tz;
730 rinvsq = rinv13*rinv13;
732 vcoul = qq*(rinv13+krsq-crf);
734 fscal = (qq*(rinv13-2.0*krsq))*rinvsq;
736 if(force_cap>0 && (fabs(fscal)> force_cap)){
737 fscal=force_cap*fscal/fabs(fscal);
745 fjx3 = faction[j3+6] - tx;
746 fjy3 = faction[j3+7] - ty;
747 fjz3 = faction[j3+8] - tz;
749 rinvsq = rinv21*rinv21;
751 vcoul = qq*(rinv21+krsq-crf);
753 fscal = (qq*(rinv21-2.0*krsq))*rinvsq;
755 if(force_cap>0 && (fabs(fscal)> force_cap)){
756 fscal=force_cap*fscal/fabs(fscal);
768 rinvsq = rinv22*rinv22;
770 vcoul = qq*(rinv22+krsq-crf);
772 fscal = (qq*(rinv22-2.0*krsq))*rinvsq;
774 if(force_cap>0 && (fabs(fscal)> force_cap)){
775 fscal=force_cap*fscal/fabs(fscal);
787 rinvsq = rinv23*rinv23;
789 vcoul = qq*(rinv23+krsq-crf);
791 fscal = (qq*(rinv23-2.0*krsq))*rinvsq;
793 if(force_cap>0 && (fabs(fscal)> force_cap)){
794 fscal=force_cap*fscal/fabs(fscal);
806 rinvsq = rinv31*rinv31;
808 vcoul = qq*(rinv31+krsq-crf);
810 fscal = (qq*(rinv31-2.0*krsq))*rinvsq;
812 if(force_cap>0 && (fabs(fscal)> force_cap)){
813 fscal=force_cap*fscal/fabs(fscal);
821 faction[j3+0] = fjx1 - tx;
822 faction[j3+1] = fjy1 - ty;
823 faction[j3+2] = fjz1 - tz;
825 rinvsq = rinv32*rinv32;
827 vcoul = qq*(rinv32+krsq-crf);
829 fscal = (qq*(rinv32-2.0*krsq))*rinvsq;
831 if(force_cap>0 && (fabs(fscal)> force_cap)){
832 fscal=force_cap*fscal/fabs(fscal);
840 faction[j3+3] = fjx2 - tx;
841 faction[j3+4] = fjy2 - ty;
842 faction[j3+5] = fjz2 - tz;
844 rinvsq = rinv33*rinv33;
846 vcoul = qq*(rinv33+krsq-crf);
848 fscal = (qq*(rinv33-2.0*krsq))*rinvsq;
850 if(force_cap>0 && (fabs(fscal)> force_cap)){
851 fscal=force_cap*fscal/fabs(fscal);
859 faction[j3+6] = fjx3 - tx;
860 faction[j3+7] = fjy3 - ty;
861 faction[j3+8] = fjz3 - tz;
864 faction[ii3+0] = faction[ii3+0] + fix1;
865 faction[ii3+1] = faction[ii3+1] + fiy1;
866 faction[ii3+2] = faction[ii3+2] + fiz1;
867 faction[ii3+3] = faction[ii3+3] + fix2;
868 faction[ii3+4] = faction[ii3+4] + fiy2;
869 faction[ii3+5] = faction[ii3+5] + fiz2;
870 faction[ii3+6] = faction[ii3+6] + fix3;
871 faction[ii3+7] = faction[ii3+7] + fiy3;
872 faction[ii3+8] = faction[ii3+8] + fiz3;
873 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
874 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
875 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
877 Vc[ggid] = Vc[ggid] + vctot;
878 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
879 ninner = ninner + nj1 - nj0;
882 nouter = nouter + nn1 - nn0;