2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel333_adress.h"
32 * Gromacs nonbonded kernel nb_kernel333_adress_cg
33 * Coulomb interaction: Tabulated
34 * VdW interaction: Tabulated
35 * water optimization: TIP4P - other atoms
36 * Calculate forces: yes
38 void nb_kernel333_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
86 real Y,F,Geps,Heps2,Fp,VV;
90 real ix1,iy1,iz1,fix1,fiy1,fiz1;
91 real ix2,iy2,iz2,fix2,fiy2,fiz2;
92 real ix3,iy3,iz3,fix3,fiy3,fiz3;
93 real ix4,iy4,iz4,fix4,fiy4,fiz4;
94 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
95 real dx11,dy11,dz11,rsq11,rinv11;
96 real dx21,dy21,dz21,rsq21,rinv21;
97 real dx31,dy31,dz31,rsq31,rinv31;
98 real dx41,dy41,dz41,rsq41,rinv41;
101 real weight_cg1, weight_cg2, weight_product;
106 nthreads = *p_nthreads;
110 tabscale = *p_tabscale;
112 qH = facel*charge[ii+1];
113 qM = facel*charge[ii+3];
114 nti = 2*ntype*type[ii];
121 #ifdef GMX_THREAD_SHM_FDECOMP
122 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
124 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
126 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
133 for(n=nn0; (n<nn1); n++)
137 shY = shiftvec[is3+1];
138 shZ = shiftvec[is3+2];
143 ix1 = shX + pos[ii3+0];
144 iy1 = shY + pos[ii3+1];
145 iz1 = shZ + pos[ii3+2];
146 ix2 = shX + pos[ii3+3];
147 iy2 = shY + pos[ii3+4];
148 iz2 = shZ + pos[ii3+5];
149 ix3 = shX + pos[ii3+6];
150 iy3 = shY + pos[ii3+7];
151 iz3 = shZ + pos[ii3+8];
152 ix4 = shX + pos[ii3+9];
153 iy4 = shY + pos[ii3+10];
154 iz4 = shZ + pos[ii3+11];
171 for(k=nj0; (k<nj1); k++)
174 weight_cg2 = wf[jnr];
175 weight_product = weight_cg1*weight_cg2;
176 if (weight_product < ALMOST_ZERO) {
179 else if (weight_product >= ALMOST_ONE)
181 /* force is zero, skip this molecule */
186 hybscal = 1.0 - weight_product;
195 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
199 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
203 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
207 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
208 rinv11 = 1.0/sqrt(rsq11);
209 rinv21 = 1.0/sqrt(rsq21);
210 rinv31 = 1.0/sqrt(rsq31);
211 rinv41 = 1.0/sqrt(rsq41);
212 tj = nti+2*type[jnr];
214 c12 = vdwparam[tj+1];
224 Geps = eps*VFtab[nnn+2];
225 Heps2 = eps2*VFtab[nnn+3];
228 FF = Fp+Geps+2.0*Heps2;
234 Geps = eps*VFtab[nnn+2];
235 Heps2 = eps2*VFtab[nnn+3];
238 FF = Fp+Geps+2.0*Heps2;
241 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
242 fscal = -((fijD+fijR)*tabscale)*rinv11;
250 fjx1 = faction[j3+0] - tx;
251 fjy1 = faction[j3+1] - ty;
252 fjz1 = faction[j3+2] - tz;
263 Geps = eps*VFtab[nnn+2];
264 Heps2 = eps2*VFtab[nnn+3];
267 FF = Fp+Geps+2.0*Heps2;
270 vctot = vctot + vcoul;
271 fscal = -((fijC)*tabscale)*rinv21;
290 Geps = eps*VFtab[nnn+2];
291 Heps2 = eps2*VFtab[nnn+3];
294 FF = Fp+Geps+2.0*Heps2;
297 vctot = vctot + vcoul;
298 fscal = -((fijC)*tabscale)*rinv31;
318 Geps = eps*VFtab[nnn+2];
319 Heps2 = eps2*VFtab[nnn+3];
322 FF = Fp+Geps+2.0*Heps2;
325 vctot = vctot + vcoul;
326 fscal = -((fijC)*tabscale)*rinv41;
334 faction[j3+0] = fjx1 - tx;
335 faction[j3+1] = fjy1 - ty;
336 faction[j3+2] = fjz1 - tz;
339 faction[ii3+0] = faction[ii3+0] + fix1;
340 faction[ii3+1] = faction[ii3+1] + fiy1;
341 faction[ii3+2] = faction[ii3+2] + fiz1;
342 faction[ii3+3] = faction[ii3+3] + fix2;
343 faction[ii3+4] = faction[ii3+4] + fiy2;
344 faction[ii3+5] = faction[ii3+5] + fiz2;
345 faction[ii3+6] = faction[ii3+6] + fix3;
346 faction[ii3+7] = faction[ii3+7] + fiy3;
347 faction[ii3+8] = faction[ii3+8] + fiz3;
348 faction[ii3+9] = faction[ii3+9] + fix4;
349 faction[ii3+10] = faction[ii3+10] + fiy4;
350 faction[ii3+11] = faction[ii3+11] + fiz4;
351 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
352 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
353 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
355 Vc[ggid] = Vc[ggid] + vctot;
356 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
357 ninner = ninner + nj1 - nj0;
360 nouter = nouter + nn1 - nn0;
373 * Gromacs nonbonded kernel nb_kernel333_adress_ex
374 * Coulomb interaction: Tabulated
375 * VdW interaction: Tabulated
376 * water optimization: TIP4P - other atoms
377 * Calculate forces: yes
379 void nb_kernel333_adress_ex(
413 int nri,ntype,nthreads;
414 real facel,krf,crf,tabscale,gbtabscale;
415 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
416 int nn0,nn1,nouter,ninner;
427 real Y,F,Geps,Heps2,Fp,VV;
431 real ix1,iy1,iz1,fix1,fiy1,fiz1;
432 real ix2,iy2,iz2,fix2,fiy2,fiz2;
433 real ix3,iy3,iz3,fix3,fiy3,fiz3;
434 real ix4,iy4,iz4,fix4,fiy4,fiz4;
435 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
436 real dx11,dy11,dz11,rsq11,rinv11;
437 real dx21,dy21,dz21,rsq21,rinv21;
438 real dx31,dy31,dz31,rsq31,rinv31;
439 real dx41,dy41,dz41,rsq41,rinv41;
442 real weight_cg1, weight_cg2, weight_product;
447 nthreads = *p_nthreads;
451 tabscale = *p_tabscale;
453 qH = facel*charge[ii+1];
454 qM = facel*charge[ii+3];
455 nti = 2*ntype*type[ii];
462 #ifdef GMX_THREAD_SHM_FDECOMP
463 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
465 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
467 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
474 for(n=nn0; (n<nn1); n++)
478 shY = shiftvec[is3+1];
479 shZ = shiftvec[is3+2];
484 ix1 = shX + pos[ii3+0];
485 iy1 = shY + pos[ii3+1];
486 iz1 = shZ + pos[ii3+2];
487 ix2 = shX + pos[ii3+3];
488 iy2 = shY + pos[ii3+4];
489 iz2 = shZ + pos[ii3+5];
490 ix3 = shX + pos[ii3+6];
491 iy3 = shY + pos[ii3+7];
492 iz3 = shZ + pos[ii3+8];
493 ix4 = shX + pos[ii3+9];
494 iy4 = shY + pos[ii3+10];
495 iz4 = shZ + pos[ii3+11];
512 for(k=nj0; (k<nj1); k++)
515 weight_cg2 = wf[jnr];
516 weight_product = weight_cg1*weight_cg2;
517 if (weight_product < ALMOST_ZERO) {
518 /* force is zero, skip this molecule */
521 else if (weight_product >= ALMOST_ONE)
527 hybscal = weight_product;
536 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
540 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
544 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
548 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
549 rinv11 = 1.0/sqrt(rsq11);
550 rinv21 = 1.0/sqrt(rsq21);
551 rinv31 = 1.0/sqrt(rsq31);
552 rinv41 = 1.0/sqrt(rsq41);
553 tj = nti+2*type[jnr];
555 c12 = vdwparam[tj+1];
565 Geps = eps*VFtab[nnn+2];
566 Heps2 = eps2*VFtab[nnn+3];
569 FF = Fp+Geps+2.0*Heps2;
575 Geps = eps*VFtab[nnn+2];
576 Heps2 = eps2*VFtab[nnn+3];
579 FF = Fp+Geps+2.0*Heps2;
582 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
583 fscal = -((fijD+fijR)*tabscale)*rinv11;
585 if(force_cap>0 && (fabs(fscal)> force_cap)){
586 fscal=force_cap*fscal/fabs(fscal);
594 fjx1 = faction[j3+0] - tx;
595 fjy1 = faction[j3+1] - ty;
596 fjz1 = faction[j3+2] - tz;
607 Geps = eps*VFtab[nnn+2];
608 Heps2 = eps2*VFtab[nnn+3];
611 FF = Fp+Geps+2.0*Heps2;
614 vctot = vctot + vcoul;
615 fscal = -((fijC)*tabscale)*rinv21;
617 if(force_cap>0 && (fabs(fscal)> force_cap)){
618 fscal=force_cap*fscal/fabs(fscal);
637 Geps = eps*VFtab[nnn+2];
638 Heps2 = eps2*VFtab[nnn+3];
641 FF = Fp+Geps+2.0*Heps2;
644 vctot = vctot + vcoul;
645 fscal = -((fijC)*tabscale)*rinv31;
647 if(force_cap>0 && (fabs(fscal)> force_cap)){
648 fscal=force_cap*fscal/fabs(fscal);
668 Geps = eps*VFtab[nnn+2];
669 Heps2 = eps2*VFtab[nnn+3];
672 FF = Fp+Geps+2.0*Heps2;
675 vctot = vctot + vcoul;
676 fscal = -((fijC)*tabscale)*rinv41;
678 if(force_cap>0 && (fabs(fscal)> force_cap)){
679 fscal=force_cap*fscal/fabs(fscal);
687 faction[j3+0] = fjx1 - tx;
688 faction[j3+1] = fjy1 - ty;
689 faction[j3+2] = fjz1 - tz;
692 faction[ii3+0] = faction[ii3+0] + fix1;
693 faction[ii3+1] = faction[ii3+1] + fiy1;
694 faction[ii3+2] = faction[ii3+2] + fiz1;
695 faction[ii3+3] = faction[ii3+3] + fix2;
696 faction[ii3+4] = faction[ii3+4] + fiy2;
697 faction[ii3+5] = faction[ii3+5] + fiz2;
698 faction[ii3+6] = faction[ii3+6] + fix3;
699 faction[ii3+7] = faction[ii3+7] + fiy3;
700 faction[ii3+8] = faction[ii3+8] + fiz3;
701 faction[ii3+9] = faction[ii3+9] + fix4;
702 faction[ii3+10] = faction[ii3+10] + fiy4;
703 faction[ii3+11] = faction[ii3+11] + fiz4;
704 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
705 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
706 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
708 Vc[ggid] = Vc[ggid] + vctot;
709 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
710 ninner = ninner + nj1 - nj0;
713 nouter = nouter + nn1 - nn0;