2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel313_adress.h"
32 * Gromacs nonbonded kernel nb_kernel313_adress_cg
33 * Coulomb interaction: Tabulated
34 * VdW interaction: Lennard-Jones
35 * water optimization: TIP4P - other atoms
36 * Calculate forces: yes
38 void nb_kernel313_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
88 real Y,F,Geps,Heps2,Fp,VV;
91 real ix1,iy1,iz1,fix1,fiy1,fiz1;
92 real ix2,iy2,iz2,fix2,fiy2,fiz2;
93 real ix3,iy3,iz3,fix3,fiy3,fiz3;
94 real ix4,iy4,iz4,fix4,fiy4,fiz4;
95 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
96 real dx11,dy11,dz11,rsq11;
97 real dx21,dy21,dz21,rsq21,rinv21;
98 real dx31,dy31,dz31,rsq31,rinv31;
99 real dx41,dy41,dz41,rsq41,rinv41;
102 real weight_cg1, weight_cg2, weight_product;
107 nthreads = *p_nthreads;
111 tabscale = *p_tabscale;
113 qH = facel*charge[ii+1];
114 qM = facel*charge[ii+3];
115 nti = 2*ntype*type[ii];
122 #ifdef GMX_THREAD_SHM_FDECOMP
123 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
125 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
127 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
134 for(n=nn0; (n<nn1); n++)
138 shY = shiftvec[is3+1];
139 shZ = shiftvec[is3+2];
144 ix1 = shX + pos[ii3+0];
145 iy1 = shY + pos[ii3+1];
146 iz1 = shZ + pos[ii3+2];
147 ix2 = shX + pos[ii3+3];
148 iy2 = shY + pos[ii3+4];
149 iz2 = shZ + pos[ii3+5];
150 ix3 = shX + pos[ii3+6];
151 iy3 = shY + pos[ii3+7];
152 iz3 = shZ + pos[ii3+8];
153 ix4 = shX + pos[ii3+9];
154 iy4 = shY + pos[ii3+10];
155 iz4 = shZ + pos[ii3+11];
172 for(k=nj0; (k<nj1); k++)
175 weight_cg2 = wf[jnr];
176 weight_product = weight_cg1*weight_cg2;
177 if (weight_product < ALMOST_ZERO) {
180 else if (weight_product >= ALMOST_ONE)
182 /* force is zero, skip this molecule */
187 hybscal = 1.0 - weight_product;
196 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
200 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
204 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
208 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
210 rinv21 = 1.0/sqrt(rsq21);
211 rinv31 = 1.0/sqrt(rsq31);
212 rinv41 = 1.0/sqrt(rsq41);
213 tj = nti+2*type[jnr];
215 c12 = vdwparam[tj+1];
216 rinvsix = rinvsq*rinvsq*rinvsq;
218 Vvdw12 = c12*rinvsix*rinvsix;
219 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
220 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
228 fjx1 = faction[j3+0] - tx;
229 fjy1 = faction[j3+1] - ty;
230 fjz1 = faction[j3+2] - tz;
241 Geps = eps*VFtab[nnn+2];
242 Heps2 = eps2*VFtab[nnn+3];
245 FF = Fp+Geps+2.0*Heps2;
248 vctot = vctot + vcoul;
249 fscal = -((fijC)*tabscale)*rinv21;
268 Geps = eps*VFtab[nnn+2];
269 Heps2 = eps2*VFtab[nnn+3];
272 FF = Fp+Geps+2.0*Heps2;
275 vctot = vctot + vcoul;
276 fscal = -((fijC)*tabscale)*rinv31;
296 Geps = eps*VFtab[nnn+2];
297 Heps2 = eps2*VFtab[nnn+3];
300 FF = Fp+Geps+2.0*Heps2;
303 vctot = vctot + vcoul;
304 fscal = -((fijC)*tabscale)*rinv41;
312 faction[j3+0] = fjx1 - tx;
313 faction[j3+1] = fjy1 - ty;
314 faction[j3+2] = fjz1 - tz;
317 faction[ii3+0] = faction[ii3+0] + fix1;
318 faction[ii3+1] = faction[ii3+1] + fiy1;
319 faction[ii3+2] = faction[ii3+2] + fiz1;
320 faction[ii3+3] = faction[ii3+3] + fix2;
321 faction[ii3+4] = faction[ii3+4] + fiy2;
322 faction[ii3+5] = faction[ii3+5] + fiz2;
323 faction[ii3+6] = faction[ii3+6] + fix3;
324 faction[ii3+7] = faction[ii3+7] + fiy3;
325 faction[ii3+8] = faction[ii3+8] + fiz3;
326 faction[ii3+9] = faction[ii3+9] + fix4;
327 faction[ii3+10] = faction[ii3+10] + fiy4;
328 faction[ii3+11] = faction[ii3+11] + fiz4;
329 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
330 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
331 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
333 Vc[ggid] = Vc[ggid] + vctot;
334 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
335 ninner = ninner + nj1 - nj0;
338 nouter = nouter + nn1 - nn0;
351 * Gromacs nonbonded kernel nb_kernel313_adress_ex
352 * Coulomb interaction: Tabulated
353 * VdW interaction: Lennard-Jones
354 * water optimization: TIP4P - other atoms
355 * Calculate forces: yes
357 void nb_kernel313_adress_ex(
391 int nri,ntype,nthreads;
392 real facel,krf,crf,tabscale,gbtabscale;
393 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
394 int nn0,nn1,nouter,ninner;
407 real Y,F,Geps,Heps2,Fp,VV;
410 real ix1,iy1,iz1,fix1,fiy1,fiz1;
411 real ix2,iy2,iz2,fix2,fiy2,fiz2;
412 real ix3,iy3,iz3,fix3,fiy3,fiz3;
413 real ix4,iy4,iz4,fix4,fiy4,fiz4;
414 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
415 real dx11,dy11,dz11,rsq11;
416 real dx21,dy21,dz21,rsq21,rinv21;
417 real dx31,dy31,dz31,rsq31,rinv31;
418 real dx41,dy41,dz41,rsq41,rinv41;
421 real weight_cg1, weight_cg2, weight_product;
426 nthreads = *p_nthreads;
430 tabscale = *p_tabscale;
432 qH = facel*charge[ii+1];
433 qM = facel*charge[ii+3];
434 nti = 2*ntype*type[ii];
441 #ifdef GMX_THREAD_SHM_FDECOMP
442 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
444 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
446 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
453 for(n=nn0; (n<nn1); n++)
457 shY = shiftvec[is3+1];
458 shZ = shiftvec[is3+2];
463 ix1 = shX + pos[ii3+0];
464 iy1 = shY + pos[ii3+1];
465 iz1 = shZ + pos[ii3+2];
466 ix2 = shX + pos[ii3+3];
467 iy2 = shY + pos[ii3+4];
468 iz2 = shZ + pos[ii3+5];
469 ix3 = shX + pos[ii3+6];
470 iy3 = shY + pos[ii3+7];
471 iz3 = shZ + pos[ii3+8];
472 ix4 = shX + pos[ii3+9];
473 iy4 = shY + pos[ii3+10];
474 iz4 = shZ + pos[ii3+11];
491 for(k=nj0; (k<nj1); k++)
494 weight_cg2 = wf[jnr];
495 weight_product = weight_cg1*weight_cg2;
496 if (weight_product < ALMOST_ZERO) {
497 /* force is zero, skip this molecule */
500 else if (weight_product >= ALMOST_ONE)
506 hybscal = weight_product;
515 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
519 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
523 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
527 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
529 rinv21 = 1.0/sqrt(rsq21);
530 rinv31 = 1.0/sqrt(rsq31);
531 rinv41 = 1.0/sqrt(rsq41);
532 tj = nti+2*type[jnr];
534 c12 = vdwparam[tj+1];
535 rinvsix = rinvsq*rinvsq*rinvsq;
537 Vvdw12 = c12*rinvsix*rinvsix;
538 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
539 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
541 if(force_cap>0 && (fabs(fscal)> force_cap)){
542 fscal=force_cap*fscal/fabs(fscal);
550 fjx1 = faction[j3+0] - tx;
551 fjy1 = faction[j3+1] - ty;
552 fjz1 = faction[j3+2] - tz;
563 Geps = eps*VFtab[nnn+2];
564 Heps2 = eps2*VFtab[nnn+3];
567 FF = Fp+Geps+2.0*Heps2;
570 vctot = vctot + vcoul;
571 fscal = -((fijC)*tabscale)*rinv21;
573 if(force_cap>0 && (fabs(fscal)> force_cap)){
574 fscal=force_cap*fscal/fabs(fscal);
593 Geps = eps*VFtab[nnn+2];
594 Heps2 = eps2*VFtab[nnn+3];
597 FF = Fp+Geps+2.0*Heps2;
600 vctot = vctot + vcoul;
601 fscal = -((fijC)*tabscale)*rinv31;
603 if(force_cap>0 && (fabs(fscal)> force_cap)){
604 fscal=force_cap*fscal/fabs(fscal);
624 Geps = eps*VFtab[nnn+2];
625 Heps2 = eps2*VFtab[nnn+3];
628 FF = Fp+Geps+2.0*Heps2;
631 vctot = vctot + vcoul;
632 fscal = -((fijC)*tabscale)*rinv41;
634 if(force_cap>0 && (fabs(fscal)> force_cap)){
635 fscal=force_cap*fscal/fabs(fscal);
643 faction[j3+0] = fjx1 - tx;
644 faction[j3+1] = fjy1 - ty;
645 faction[j3+2] = fjz1 - tz;
648 faction[ii3+0] = faction[ii3+0] + fix1;
649 faction[ii3+1] = faction[ii3+1] + fiy1;
650 faction[ii3+2] = faction[ii3+2] + fiz1;
651 faction[ii3+3] = faction[ii3+3] + fix2;
652 faction[ii3+4] = faction[ii3+4] + fiy2;
653 faction[ii3+5] = faction[ii3+5] + fiz2;
654 faction[ii3+6] = faction[ii3+6] + fix3;
655 faction[ii3+7] = faction[ii3+7] + fiy3;
656 faction[ii3+8] = faction[ii3+8] + fiz3;
657 faction[ii3+9] = faction[ii3+9] + fix4;
658 faction[ii3+10] = faction[ii3+10] + fiy4;
659 faction[ii3+11] = faction[ii3+11] + fiz4;
660 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
661 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
662 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
664 Vc[ggid] = Vc[ggid] + vctot;
665 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
666 ninner = ninner + nj1 - nj0;
669 nouter = nouter + nn1 - nn0;