2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel420_adress.h"
32 * Gromacs nonbonded kernel nb_kernel420_adress_cg
33 * Coulomb interaction: Generalized-Born
34 * VdW interaction: Buckingham
35 * water optimization: No
36 * Calculate forces: yes
38 void nb_kernel420_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
87 real Y,F,Geps,Heps2,Fp,VV;
90 real isai,isaj,isaprod,gbscale,vgb;
91 real dvdasum,dvdatmp,dvdaj,fgb;
93 real ix1,iy1,iz1,fix1,fiy1,fiz1;
95 real dx11,dy11,dz11,rsq11,rinv11;
97 real weight_cg1, weight_cg2, weight_product;
102 nthreads = *p_nthreads;
106 tabscale = *p_tabscale;
107 gbtabscale = *p_gbtabscale;
113 #ifdef GMX_THREAD_SHM_FDECOMP
114 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
116 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
118 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
125 for(n=nn0; (n<nn1); n++)
129 shY = shiftvec[is3+1];
130 shZ = shiftvec[is3+2];
135 ix1 = shX + pos[ii3+0];
136 iy1 = shY + pos[ii3+1];
137 iz1 = shZ + pos[ii3+2];
138 iq = facel*charge[ii];
140 nti = 3*ntype*type[ii];
149 for(k=nj0; (k<nj1); k++)
152 weight_cg2 = wf[jnr];
153 weight_product = weight_cg1*weight_cg2;
154 if (weight_product < ALMOST_ZERO) {
157 else if (weight_product >= ALMOST_ONE)
159 /* force is zero, skip this molecule */
164 hybscal = 1.0 - weight_product;
173 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
174 rinv11 = 1.0/sqrt(rsq11);
175 isaj = invsqrta[jnr];
179 fscal = vcoul*rinv11;
181 gbscale = isaprod*gbtabscale;
182 tj = nti+3*type[jnr];
184 cexp1 = vdwparam[tj+1];
185 cexp2 = vdwparam[tj+2];
186 rinvsq = rinv11*rinv11;
196 Geps = eps*GBtab[nnn+2];
197 Heps2 = eps2*GBtab[nnn+3];
200 FF = Fp+Geps+2.0*Heps2;
202 fijC = qq*FF*gbscale;
203 dvdatmp = -0.5*(vgb+fijC*r);
204 dvdasum = dvdasum + dvdatmp;
205 dvda[jnr] = dvdaj+dvdatmp*isaj*isaj;
206 vctot = vctot + vcoul;
207 rinvsix = rinvsq*rinvsq*rinvsq;
209 br = cexp2*rsq11*rinv11;
210 Vvdwexp = cexp1*exp(-br);
211 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6;
212 fscal = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv11;
220 faction[j3+0] = faction[j3+0] - tx;
221 faction[j3+1] = faction[j3+1] - ty;
222 faction[j3+2] = faction[j3+2] - tz;
225 faction[ii3+0] = faction[ii3+0] + fix1;
226 faction[ii3+1] = faction[ii3+1] + fiy1;
227 faction[ii3+2] = faction[ii3+2] + fiz1;
228 fshift[is3] = fshift[is3]+fix1;
229 fshift[is3+1] = fshift[is3+1]+fiy1;
230 fshift[is3+2] = fshift[is3+2]+fiz1;
232 Vc[ggid] = Vc[ggid] + vctot;
233 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
234 dvda[ii] = dvda[ii] + dvdasum*isai*isai;
235 ninner = ninner + nj1 - nj0;
238 nouter = nouter + nn1 - nn0;
251 * Gromacs nonbonded kernel nb_kernel420_adress_ex
252 * Coulomb interaction: Generalized-Born
253 * VdW interaction: Buckingham
254 * water optimization: No
255 * Calculate forces: yes
257 void nb_kernel420_adress_ex(
291 int nri,ntype,nthreads;
292 real facel,krf,crf,tabscale,gbtabscale;
293 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
294 int nn0,nn1,nouter,ninner;
306 real Y,F,Geps,Heps2,Fp,VV;
309 real isai,isaj,isaprod,gbscale,vgb;
310 real dvdasum,dvdatmp,dvdaj,fgb;
312 real ix1,iy1,iz1,fix1,fiy1,fiz1;
314 real dx11,dy11,dz11,rsq11,rinv11;
316 real weight_cg1, weight_cg2, weight_product;
321 nthreads = *p_nthreads;
325 tabscale = *p_tabscale;
326 gbtabscale = *p_gbtabscale;
332 #ifdef GMX_THREAD_SHM_FDECOMP
333 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
335 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
337 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
344 for(n=nn0; (n<nn1); n++)
348 shY = shiftvec[is3+1];
349 shZ = shiftvec[is3+2];
354 ix1 = shX + pos[ii3+0];
355 iy1 = shY + pos[ii3+1];
356 iz1 = shZ + pos[ii3+2];
357 iq = facel*charge[ii];
359 nti = 3*ntype*type[ii];
368 for(k=nj0; (k<nj1); k++)
371 weight_cg2 = wf[jnr];
372 weight_product = weight_cg1*weight_cg2;
373 if (weight_product < ALMOST_ZERO) {
374 /* force is zero, skip this molecule */
377 else if (weight_product >= ALMOST_ONE)
383 hybscal = weight_product;
392 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
393 rinv11 = 1.0/sqrt(rsq11);
394 isaj = invsqrta[jnr];
398 fscal = vcoul*rinv11;
400 gbscale = isaprod*gbtabscale;
401 tj = nti+3*type[jnr];
403 cexp1 = vdwparam[tj+1];
404 cexp2 = vdwparam[tj+2];
405 rinvsq = rinv11*rinv11;
415 Geps = eps*GBtab[nnn+2];
416 Heps2 = eps2*GBtab[nnn+3];
419 FF = Fp+Geps+2.0*Heps2;
421 fijC = qq*FF*gbscale;
422 dvdatmp = -0.5*(vgb+fijC*r);
423 dvdasum = dvdasum + dvdatmp;
424 dvda[jnr] = dvdaj+dvdatmp*isaj*isaj;
425 vctot = vctot + vcoul;
426 rinvsix = rinvsq*rinvsq*rinvsq;
428 br = cexp2*rsq11*rinv11;
429 Vvdwexp = cexp1*exp(-br);
430 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6;
431 fscal = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv11;
433 if(force_cap>0 && (fabs(fscal)> force_cap)){
434 fscal=force_cap*fscal/fabs(fscal);
442 faction[j3+0] = faction[j3+0] - tx;
443 faction[j3+1] = faction[j3+1] - ty;
444 faction[j3+2] = faction[j3+2] - tz;
447 faction[ii3+0] = faction[ii3+0] + fix1;
448 faction[ii3+1] = faction[ii3+1] + fiy1;
449 faction[ii3+2] = faction[ii3+2] + fiz1;
450 fshift[is3] = fshift[is3]+fix1;
451 fshift[is3+1] = fshift[is3+1]+fiy1;
452 fshift[is3+2] = fshift[is3+2]+fiz1;
454 Vc[ggid] = Vc[ggid] + vctot;
455 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
456 dvda[ii] = dvda[ii] + dvdasum*isai*isai;
457 ninner = ninner + nj1 - nj0;
460 nouter = nouter + nn1 - nn0;