2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel430_adress.h"
32 * Gromacs nonbonded kernel nb_kernel430_adress_cg
33 * Coulomb interaction: Generalized-Born
34 * VdW interaction: Tabulated
35 * water optimization: No
36 * Calculate forces: yes
38 void nb_kernel430_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
86 real Y,F,Geps,Heps2,Fp,VV;
90 real isai,isaj,isaprod,gbscale,vgb;
91 real dvdasum,dvdatmp,dvdaj,fgb;
92 real ix1,iy1,iz1,fix1,fiy1,fiz1;
94 real dx11,dy11,dz11,rsq11,rinv11;
96 real weight_cg1, weight_cg2, weight_product;
101 nthreads = *p_nthreads;
105 tabscale = *p_tabscale;
106 gbtabscale = *p_gbtabscale;
112 #ifdef GMX_THREAD_SHM_FDECOMP
113 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
115 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
117 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
124 for(n=nn0; (n<nn1); n++)
128 shY = shiftvec[is3+1];
129 shZ = shiftvec[is3+2];
134 ix1 = shX + pos[ii3+0];
135 iy1 = shY + pos[ii3+1];
136 iz1 = shZ + pos[ii3+2];
137 iq = facel*charge[ii];
139 nti = 2*ntype*type[ii];
148 for(k=nj0; (k<nj1); k++)
151 weight_cg2 = wf[jnr];
152 weight_product = weight_cg1*weight_cg2;
153 if (weight_product < ALMOST_ZERO) {
156 else if (weight_product >= ALMOST_ONE)
158 /* force is zero, skip this molecule */
163 hybscal = 1.0 - weight_product;
172 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
173 rinv11 = 1.0/sqrt(rsq11);
174 isaj = invsqrta[jnr];
178 fscal = vcoul*rinv11;
180 gbscale = isaprod*gbtabscale;
181 tj = nti+2*type[jnr];
183 c12 = vdwparam[tj+1];
193 Geps = eps*GBtab[nnn+2];
194 Heps2 = eps2*GBtab[nnn+3];
197 FF = Fp+Geps+2.0*Heps2;
199 fijC = qq*FF*gbscale;
200 dvdatmp = -0.5*(vgb+fijC*r);
201 dvdasum = dvdasum + dvdatmp;
202 dvda[jnr] = dvdaj+dvdatmp*isaj*isaj;
203 vctot = vctot + vcoul;
212 Geps = eps*VFtab[nnn+2];
213 Heps2 = eps2*VFtab[nnn+3];
216 FF = Fp+Geps+2.0*Heps2;
222 Geps = eps*VFtab[nnn+2];
223 Heps2 = eps2*VFtab[nnn+3];
226 FF = Fp+Geps+2.0*Heps2;
229 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
230 fscal = -((fijD+fijR)*tabscale+fijC-fscal)*rinv11;
238 faction[j3+0] = faction[j3+0] - tx;
239 faction[j3+1] = faction[j3+1] - ty;
240 faction[j3+2] = faction[j3+2] - tz;
243 faction[ii3+0] = faction[ii3+0] + fix1;
244 faction[ii3+1] = faction[ii3+1] + fiy1;
245 faction[ii3+2] = faction[ii3+2] + fiz1;
246 fshift[is3] = fshift[is3]+fix1;
247 fshift[is3+1] = fshift[is3+1]+fiy1;
248 fshift[is3+2] = fshift[is3+2]+fiz1;
250 Vc[ggid] = Vc[ggid] + vctot;
251 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
252 dvda[ii] = dvda[ii] + dvdasum*isai*isai;
253 ninner = ninner + nj1 - nj0;
256 nouter = nouter + nn1 - nn0;
269 * Gromacs nonbonded kernel nb_kernel430_adress_ex
270 * Coulomb interaction: Generalized-Born
271 * VdW interaction: Tabulated
272 * water optimization: No
273 * Calculate forces: yes
275 void nb_kernel430_adress_ex(
309 int nri,ntype,nthreads;
310 real facel,krf,crf,tabscale,gbtabscale;
311 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
312 int nn0,nn1,nouter,ninner;
323 real Y,F,Geps,Heps2,Fp,VV;
327 real isai,isaj,isaprod,gbscale,vgb;
328 real dvdasum,dvdatmp,dvdaj,fgb;
329 real ix1,iy1,iz1,fix1,fiy1,fiz1;
331 real dx11,dy11,dz11,rsq11,rinv11;
333 real weight_cg1, weight_cg2, weight_product;
338 nthreads = *p_nthreads;
342 tabscale = *p_tabscale;
343 gbtabscale = *p_gbtabscale;
349 #ifdef GMX_THREAD_SHM_FDECOMP
350 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
352 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
354 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
361 for(n=nn0; (n<nn1); n++)
365 shY = shiftvec[is3+1];
366 shZ = shiftvec[is3+2];
371 ix1 = shX + pos[ii3+0];
372 iy1 = shY + pos[ii3+1];
373 iz1 = shZ + pos[ii3+2];
374 iq = facel*charge[ii];
376 nti = 2*ntype*type[ii];
385 for(k=nj0; (k<nj1); k++)
388 weight_cg2 = wf[jnr];
389 weight_product = weight_cg1*weight_cg2;
390 if (weight_product < ALMOST_ZERO) {
391 /* force is zero, skip this molecule */
394 else if (weight_product >= ALMOST_ONE)
400 hybscal = weight_product;
409 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
410 rinv11 = 1.0/sqrt(rsq11);
411 isaj = invsqrta[jnr];
415 fscal = vcoul*rinv11;
417 gbscale = isaprod*gbtabscale;
418 tj = nti+2*type[jnr];
420 c12 = vdwparam[tj+1];
430 Geps = eps*GBtab[nnn+2];
431 Heps2 = eps2*GBtab[nnn+3];
434 FF = Fp+Geps+2.0*Heps2;
436 fijC = qq*FF*gbscale;
437 dvdatmp = -0.5*(vgb+fijC*r);
438 dvdasum = dvdasum + dvdatmp;
439 dvda[jnr] = dvdaj+dvdatmp*isaj*isaj;
440 vctot = vctot + vcoul;
449 Geps = eps*VFtab[nnn+2];
450 Heps2 = eps2*VFtab[nnn+3];
453 FF = Fp+Geps+2.0*Heps2;
459 Geps = eps*VFtab[nnn+2];
460 Heps2 = eps2*VFtab[nnn+3];
463 FF = Fp+Geps+2.0*Heps2;
466 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12;
467 fscal = -((fijD+fijR)*tabscale+fijC-fscal)*rinv11;
469 if(force_cap>0 && (fabs(fscal)> force_cap)){
470 fscal=force_cap*fscal/fabs(fscal);
478 faction[j3+0] = faction[j3+0] - tx;
479 faction[j3+1] = faction[j3+1] - ty;
480 faction[j3+2] = faction[j3+2] - tz;
483 faction[ii3+0] = faction[ii3+0] + fix1;
484 faction[ii3+1] = faction[ii3+1] + fiy1;
485 faction[ii3+2] = faction[ii3+2] + fiz1;
486 fshift[is3] = fshift[is3]+fix1;
487 fshift[is3+1] = fshift[is3+1]+fiy1;
488 fshift[is3+2] = fshift[is3+2]+fiz1;
490 Vc[ggid] = Vc[ggid] + vctot;
491 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
492 dvda[ii] = dvda[ii] + dvdasum*isai*isai;
493 ninner = ninner + nj1 - nj0;
496 nouter = nouter + nn1 - nn0;