2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel233_adress.h"
32 * Gromacs nonbonded kernel nb_kernel233_adress_cg
33 * Coulomb interaction: Reaction field
34 * VdW interaction: Tabulated
35 * water optimization: TIP4P - other atoms
36 * Calculate forces: yes
38 void nb_kernel233_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
87 real Y,F,Geps,Heps2,Fp,VV;
91 real ix1,iy1,iz1,fix1,fiy1,fiz1;
92 real ix2,iy2,iz2,fix2,fiy2,fiz2;
93 real ix3,iy3,iz3,fix3,fiy3,fiz3;
94 real ix4,iy4,iz4,fix4,fiy4,fiz4;
95 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
96 real dx11,dy11,dz11,rsq11,rinv11;
97 real dx21,dy21,dz21,rsq21,rinv21;
98 real dx31,dy31,dz31,rsq31,rinv31;
99 real dx41,dy41,dz41,rsq41,rinv41;
102 real weight_cg1, weight_cg2, weight_product;
107 nthreads = *p_nthreads;
111 tabscale = *p_tabscale;
113 qH = facel*charge[ii+1];
114 qM = facel*charge[ii+3];
115 nti = 2*ntype*type[ii];
122 #ifdef GMX_THREAD_SHM_FDECOMP
123 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
125 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
127 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
134 for(n=nn0; (n<nn1); n++)
138 shY = shiftvec[is3+1];
139 shZ = shiftvec[is3+2];
144 ix1 = shX + pos[ii3+0];
145 iy1 = shY + pos[ii3+1];
146 iz1 = shZ + pos[ii3+2];
147 ix2 = shX + pos[ii3+3];
148 iy2 = shY + pos[ii3+4];
149 iz2 = shZ + pos[ii3+5];
150 ix3 = shX + pos[ii3+6];
151 iy3 = shY + pos[ii3+7];
152 iz3 = shZ + pos[ii3+8];
153 ix4 = shX + pos[ii3+9];
154 iy4 = shY + pos[ii3+10];
155 iz4 = shZ + pos[ii3+11];
172 for(k=nj0; (k<nj1); k++)
175 weight_cg2 = wf[jnr];
176 weight_product = weight_cg1*weight_cg2;
177 if (weight_product < ALMOST_ZERO) {
180 else if (weight_product >= ALMOST_ONE)
182 /* force is zero, skip this molecule */
187 hybscal = 1.0 - weight_product;
196 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
200 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
204 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
208 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
209 rinv11 = 1.0/sqrt(rsq11);
210 rinv21 = 1.0/sqrt(rsq21);
211 rinv31 = 1.0/sqrt(rsq31);
212 rinv41 = 1.0/sqrt(rsq41);
213 tj = nti+2*type[jnr];
215 c12 = vdwparam[tj+1];
224 Geps = eps*VFtab[nnn+2];
225 Heps2 = eps2*VFtab[nnn+3];
228 FF = Fp+Geps+2.0*Heps2;
234 Geps = eps*VFtab[nnn+2];
235 Heps2 = eps2*VFtab[nnn+3];
238 FF = Fp+Geps+2.0*Heps2;
241 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
242 fscal = -((fijD+fijR)*tabscale)*rinv11;
250 fjx1 = faction[j3+0] - tx;
251 fjy1 = faction[j3+1] - ty;
252 fjz1 = faction[j3+2] - tz;
255 rinvsq = rinv21*rinv21;
257 vcoul = qq*(rinv21+krsq-crf);
259 fscal = (qq*(rinv21-2.0*krsq))*rinvsq;
270 rinvsq = rinv31*rinv31;
272 vcoul = qq*(rinv31+krsq-crf);
274 fscal = (qq*(rinv31-2.0*krsq))*rinvsq;
286 rinvsq = rinv41*rinv41;
288 vcoul = qq*(rinv41+krsq-crf);
290 fscal = (qq*(rinv41-2.0*krsq))*rinvsq;
298 faction[j3+0] = fjx1 - tx;
299 faction[j3+1] = fjy1 - ty;
300 faction[j3+2] = fjz1 - tz;
303 faction[ii3+0] = faction[ii3+0] + fix1;
304 faction[ii3+1] = faction[ii3+1] + fiy1;
305 faction[ii3+2] = faction[ii3+2] + fiz1;
306 faction[ii3+3] = faction[ii3+3] + fix2;
307 faction[ii3+4] = faction[ii3+4] + fiy2;
308 faction[ii3+5] = faction[ii3+5] + fiz2;
309 faction[ii3+6] = faction[ii3+6] + fix3;
310 faction[ii3+7] = faction[ii3+7] + fiy3;
311 faction[ii3+8] = faction[ii3+8] + fiz3;
312 faction[ii3+9] = faction[ii3+9] + fix4;
313 faction[ii3+10] = faction[ii3+10] + fiy4;
314 faction[ii3+11] = faction[ii3+11] + fiz4;
315 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
316 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
317 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
319 Vc[ggid] = Vc[ggid] + vctot;
320 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
321 ninner = ninner + nj1 - nj0;
324 nouter = nouter + nn1 - nn0;
337 * Gromacs nonbonded kernel nb_kernel233_adress_ex
338 * Coulomb interaction: Reaction field
339 * VdW interaction: Tabulated
340 * water optimization: TIP4P - other atoms
341 * Calculate forces: yes
343 void nb_kernel233_adress_ex(
377 int nri,ntype,nthreads;
378 real facel,krf,crf,tabscale,gbtabscale;
379 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
380 int nn0,nn1,nouter,ninner;
392 real Y,F,Geps,Heps2,Fp,VV;
396 real ix1,iy1,iz1,fix1,fiy1,fiz1;
397 real ix2,iy2,iz2,fix2,fiy2,fiz2;
398 real ix3,iy3,iz3,fix3,fiy3,fiz3;
399 real ix4,iy4,iz4,fix4,fiy4,fiz4;
400 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
401 real dx11,dy11,dz11,rsq11,rinv11;
402 real dx21,dy21,dz21,rsq21,rinv21;
403 real dx31,dy31,dz31,rsq31,rinv31;
404 real dx41,dy41,dz41,rsq41,rinv41;
407 real weight_cg1, weight_cg2, weight_product;
412 nthreads = *p_nthreads;
416 tabscale = *p_tabscale;
418 qH = facel*charge[ii+1];
419 qM = facel*charge[ii+3];
420 nti = 2*ntype*type[ii];
427 #ifdef GMX_THREAD_SHM_FDECOMP
428 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
430 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
432 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
439 for(n=nn0; (n<nn1); n++)
443 shY = shiftvec[is3+1];
444 shZ = shiftvec[is3+2];
449 ix1 = shX + pos[ii3+0];
450 iy1 = shY + pos[ii3+1];
451 iz1 = shZ + pos[ii3+2];
452 ix2 = shX + pos[ii3+3];
453 iy2 = shY + pos[ii3+4];
454 iz2 = shZ + pos[ii3+5];
455 ix3 = shX + pos[ii3+6];
456 iy3 = shY + pos[ii3+7];
457 iz3 = shZ + pos[ii3+8];
458 ix4 = shX + pos[ii3+9];
459 iy4 = shY + pos[ii3+10];
460 iz4 = shZ + pos[ii3+11];
477 for(k=nj0; (k<nj1); k++)
480 weight_cg2 = wf[jnr];
481 weight_product = weight_cg1*weight_cg2;
482 if (weight_product < ALMOST_ZERO) {
483 /* force is zero, skip this molecule */
486 else if (weight_product >= ALMOST_ONE)
492 hybscal = weight_product;
501 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
505 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
509 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
513 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
514 rinv11 = 1.0/sqrt(rsq11);
515 rinv21 = 1.0/sqrt(rsq21);
516 rinv31 = 1.0/sqrt(rsq31);
517 rinv41 = 1.0/sqrt(rsq41);
518 tj = nti+2*type[jnr];
520 c12 = vdwparam[tj+1];
529 Geps = eps*VFtab[nnn+2];
530 Heps2 = eps2*VFtab[nnn+3];
533 FF = Fp+Geps+2.0*Heps2;
539 Geps = eps*VFtab[nnn+2];
540 Heps2 = eps2*VFtab[nnn+3];
543 FF = Fp+Geps+2.0*Heps2;
546 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
547 fscal = -((fijD+fijR)*tabscale)*rinv11;
549 if(force_cap>0 && (fabs(fscal)> force_cap)){
550 fscal=force_cap*fscal/fabs(fscal);
558 fjx1 = faction[j3+0] - tx;
559 fjy1 = faction[j3+1] - ty;
560 fjz1 = faction[j3+2] - tz;
563 rinvsq = rinv21*rinv21;
565 vcoul = qq*(rinv21+krsq-crf);
567 fscal = (qq*(rinv21-2.0*krsq))*rinvsq;
569 if(force_cap>0 && (fabs(fscal)> force_cap)){
570 fscal=force_cap*fscal/fabs(fscal);
581 rinvsq = rinv31*rinv31;
583 vcoul = qq*(rinv31+krsq-crf);
585 fscal = (qq*(rinv31-2.0*krsq))*rinvsq;
587 if(force_cap>0 && (fabs(fscal)> force_cap)){
588 fscal=force_cap*fscal/fabs(fscal);
600 rinvsq = rinv41*rinv41;
602 vcoul = qq*(rinv41+krsq-crf);
604 fscal = (qq*(rinv41-2.0*krsq))*rinvsq;
606 if(force_cap>0 && (fabs(fscal)> force_cap)){
607 fscal=force_cap*fscal/fabs(fscal);
615 faction[j3+0] = fjx1 - tx;
616 faction[j3+1] = fjy1 - ty;
617 faction[j3+2] = fjz1 - tz;
620 faction[ii3+0] = faction[ii3+0] + fix1;
621 faction[ii3+1] = faction[ii3+1] + fiy1;
622 faction[ii3+2] = faction[ii3+2] + fiz1;
623 faction[ii3+3] = faction[ii3+3] + fix2;
624 faction[ii3+4] = faction[ii3+4] + fiy2;
625 faction[ii3+5] = faction[ii3+5] + fiz2;
626 faction[ii3+6] = faction[ii3+6] + fix3;
627 faction[ii3+7] = faction[ii3+7] + fiy3;
628 faction[ii3+8] = faction[ii3+8] + fiz3;
629 faction[ii3+9] = faction[ii3+9] + fix4;
630 faction[ii3+10] = faction[ii3+10] + fiy4;
631 faction[ii3+11] = faction[ii3+11] + fiz4;
632 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
633 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
634 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
636 Vc[ggid] = Vc[ggid] + vctot;
637 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
638 ninner = ninner + nj1 - nj0;
641 nouter = nouter + nn1 - nn0;