2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel400_adress.h"
32 * Gromacs nonbonded kernel nb_kernel400_adress_cg
33 * Coulomb interaction: Generalized-Born
34 * VdW interaction: Not calculated
35 * water optimization: No
36 * Calculate forces: yes
38 void nb_kernel400_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
82 real Y,F,Geps,Heps2,Fp,VV;
85 real isai,isaj,isaprod,gbscale,vgb;
86 real dvdasum,dvdatmp,dvdaj,fgb;
87 real ix1,iy1,iz1,fix1,fiy1,fiz1;
89 real dx11,dy11,dz11,rsq11,rinv11;
90 real weight_cg1, weight_cg2, weight_product;
95 nthreads = *p_nthreads;
99 tabscale = *p_tabscale;
100 gbtabscale = *p_gbtabscale;
106 #ifdef GMX_THREAD_SHM_FDECOMP
107 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
109 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
111 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
118 for(n=nn0; (n<nn1); n++)
122 shY = shiftvec[is3+1];
123 shZ = shiftvec[is3+2];
128 ix1 = shX + pos[ii3+0];
129 iy1 = shY + pos[ii3+1];
130 iz1 = shZ + pos[ii3+2];
131 iq = facel*charge[ii];
140 for(k=nj0; (k<nj1); k++)
143 weight_cg2 = wf[jnr];
144 weight_product = weight_cg1*weight_cg2;
145 if (weight_product < ALMOST_ZERO) {
148 else if (weight_product >= ALMOST_ONE)
150 /* force is zero, skip this molecule */
155 hybscal = 1.0 - weight_product;
164 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
165 rinv11 = 1.0/sqrt(rsq11);
166 isaj = invsqrta[jnr];
170 fscal = vcoul*rinv11;
172 gbscale = isaprod*gbtabscale;
182 Geps = eps*GBtab[nnn+2];
183 Heps2 = eps2*GBtab[nnn+3];
186 FF = Fp+Geps+2.0*Heps2;
188 fijC = qq*FF*gbscale;
189 dvdatmp = -0.5*(vgb+fijC*r);
190 dvdasum = dvdasum + dvdatmp;
191 dvda[jnr] = dvdaj+dvdatmp*isaj*isaj;
192 vctot = vctot + vcoul;
193 fscal = -(fijC-fscal)*rinv11;
201 faction[j3+0] = faction[j3+0] - tx;
202 faction[j3+1] = faction[j3+1] - ty;
203 faction[j3+2] = faction[j3+2] - tz;
206 faction[ii3+0] = faction[ii3+0] + fix1;
207 faction[ii3+1] = faction[ii3+1] + fiy1;
208 faction[ii3+2] = faction[ii3+2] + fiz1;
209 fshift[is3] = fshift[is3]+fix1;
210 fshift[is3+1] = fshift[is3+1]+fiy1;
211 fshift[is3+2] = fshift[is3+2]+fiz1;
213 Vc[ggid] = Vc[ggid] + vctot;
214 dvda[ii] = dvda[ii] + dvdasum*isai*isai;
215 ninner = ninner + nj1 - nj0;
218 nouter = nouter + nn1 - nn0;
231 * Gromacs nonbonded kernel nb_kernel400_adress_ex
232 * Coulomb interaction: Generalized-Born
233 * VdW interaction: Not calculated
234 * water optimization: No
235 * Calculate forces: yes
237 void nb_kernel400_adress_ex(
271 int nri,ntype,nthreads;
272 real facel,krf,crf,tabscale,gbtabscale;
273 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
274 int nn0,nn1,nouter,ninner;
281 real Y,F,Geps,Heps2,Fp,VV;
284 real isai,isaj,isaprod,gbscale,vgb;
285 real dvdasum,dvdatmp,dvdaj,fgb;
286 real ix1,iy1,iz1,fix1,fiy1,fiz1;
288 real dx11,dy11,dz11,rsq11,rinv11;
289 real weight_cg1, weight_cg2, weight_product;
294 nthreads = *p_nthreads;
298 tabscale = *p_tabscale;
299 gbtabscale = *p_gbtabscale;
305 #ifdef GMX_THREAD_SHM_FDECOMP
306 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
308 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
310 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
317 for(n=nn0; (n<nn1); n++)
321 shY = shiftvec[is3+1];
322 shZ = shiftvec[is3+2];
327 ix1 = shX + pos[ii3+0];
328 iy1 = shY + pos[ii3+1];
329 iz1 = shZ + pos[ii3+2];
330 iq = facel*charge[ii];
339 for(k=nj0; (k<nj1); k++)
342 weight_cg2 = wf[jnr];
343 weight_product = weight_cg1*weight_cg2;
344 if (weight_product < ALMOST_ZERO) {
345 /* force is zero, skip this molecule */
348 else if (weight_product >= ALMOST_ONE)
354 hybscal = weight_product;
363 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
364 rinv11 = 1.0/sqrt(rsq11);
365 isaj = invsqrta[jnr];
369 fscal = vcoul*rinv11;
371 gbscale = isaprod*gbtabscale;
381 Geps = eps*GBtab[nnn+2];
382 Heps2 = eps2*GBtab[nnn+3];
385 FF = Fp+Geps+2.0*Heps2;
387 fijC = qq*FF*gbscale;
388 dvdatmp = -0.5*(vgb+fijC*r);
389 dvdasum = dvdasum + dvdatmp;
390 dvda[jnr] = dvdaj+dvdatmp*isaj*isaj;
391 vctot = vctot + vcoul;
392 fscal = -(fijC-fscal)*rinv11;
400 faction[j3+0] = faction[j3+0] - tx;
401 faction[j3+1] = faction[j3+1] - ty;
402 faction[j3+2] = faction[j3+2] - tz;
405 faction[ii3+0] = faction[ii3+0] + fix1;
406 faction[ii3+1] = faction[ii3+1] + fiy1;
407 faction[ii3+2] = faction[ii3+2] + fiz1;
408 fshift[is3] = fshift[is3]+fix1;
409 fshift[is3+1] = fshift[is3+1]+fiy1;
410 fshift[is3+2] = fshift[is3+2]+fiz1;
412 Vc[ggid] = Vc[ggid] + vctot;
413 dvda[ii] = dvda[ii] + dvdasum*isai*isai;
414 ninner = ninner + nj1 - nj0;
417 nouter = nouter + nn1 - nn0;