2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel123_adress.h"
32 * Gromacs nonbonded kernel nb_kernel123_adress_cg
33 * Coulomb interaction: Normal Coulomb
34 * VdW interaction: Buckingham
35 * water optimization: TIP4P - other atoms
36 * Calculate forces: yes
38 void nb_kernel123_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
86 real ix1,iy1,iz1,fix1,fiy1,fiz1;
87 real ix2,iy2,iz2,fix2,fiy2,fiz2;
88 real ix3,iy3,iz3,fix3,fiy3,fiz3;
89 real ix4,iy4,iz4,fix4,fiy4,fiz4;
90 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
91 real dx11,dy11,dz11,rsq11,rinv11;
92 real dx21,dy21,dz21,rsq21,rinv21;
93 real dx31,dy31,dz31,rsq31,rinv31;
94 real dx41,dy41,dz41,rsq41,rinv41;
97 real weight_cg1, weight_cg2, weight_product;
102 nthreads = *p_nthreads;
106 tabscale = *p_tabscale;
108 qH = facel*charge[ii+1];
109 qM = facel*charge[ii+3];
110 nti = 3*ntype*type[ii];
117 #ifdef GMX_THREAD_SHM_FDECOMP
118 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
120 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
122 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
129 for(n=nn0; (n<nn1); n++)
133 shY = shiftvec[is3+1];
134 shZ = shiftvec[is3+2];
139 ix1 = shX + pos[ii3+0];
140 iy1 = shY + pos[ii3+1];
141 iz1 = shZ + pos[ii3+2];
142 ix2 = shX + pos[ii3+3];
143 iy2 = shY + pos[ii3+4];
144 iz2 = shZ + pos[ii3+5];
145 ix3 = shX + pos[ii3+6];
146 iy3 = shY + pos[ii3+7];
147 iz3 = shZ + pos[ii3+8];
148 ix4 = shX + pos[ii3+9];
149 iy4 = shY + pos[ii3+10];
150 iz4 = shZ + pos[ii3+11];
167 for(k=nj0; (k<nj1); k++)
170 weight_cg2 = wf[jnr];
171 weight_product = weight_cg1*weight_cg2;
172 if (weight_product < ALMOST_ZERO) {
175 else if (weight_product >= ALMOST_ONE)
177 /* force is zero, skip this molecule */
182 hybscal = 1.0 - weight_product;
191 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
195 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
199 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
203 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
204 rinv11 = 1.0/sqrt(rsq11);
205 rinv21 = 1.0/sqrt(rsq21);
206 rinv31 = 1.0/sqrt(rsq31);
207 rinv41 = 1.0/sqrt(rsq41);
208 tj = nti+3*type[jnr];
210 cexp1 = vdwparam[tj+1];
211 cexp2 = vdwparam[tj+2];
212 rinvsq = rinv11*rinv11;
213 rinvsix = rinvsq*rinvsq*rinvsq;
215 br = cexp2*rsq11*rinv11;
216 Vvdwexp = cexp1*exp(-br);
217 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6;
218 fscal = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
226 fjx1 = faction[j3+0] - tx;
227 fjy1 = faction[j3+1] - ty;
228 fjz1 = faction[j3+2] - tz;
231 rinvsq = rinv21*rinv21;
234 fscal = (vcoul)*rinvsq;
245 rinvsq = rinv31*rinv31;
248 fscal = (vcoul)*rinvsq;
260 rinvsq = rinv41*rinv41;
263 fscal = (vcoul)*rinvsq;
271 faction[j3+0] = fjx1 - tx;
272 faction[j3+1] = fjy1 - ty;
273 faction[j3+2] = fjz1 - tz;
276 faction[ii3+0] = faction[ii3+0] + fix1;
277 faction[ii3+1] = faction[ii3+1] + fiy1;
278 faction[ii3+2] = faction[ii3+2] + fiz1;
279 faction[ii3+3] = faction[ii3+3] + fix2;
280 faction[ii3+4] = faction[ii3+4] + fiy2;
281 faction[ii3+5] = faction[ii3+5] + fiz2;
282 faction[ii3+6] = faction[ii3+6] + fix3;
283 faction[ii3+7] = faction[ii3+7] + fiy3;
284 faction[ii3+8] = faction[ii3+8] + fiz3;
285 faction[ii3+9] = faction[ii3+9] + fix4;
286 faction[ii3+10] = faction[ii3+10] + fiy4;
287 faction[ii3+11] = faction[ii3+11] + fiz4;
288 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
289 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
290 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
292 Vc[ggid] = Vc[ggid] + vctot;
293 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
294 ninner = ninner + nj1 - nj0;
297 nouter = nouter + nn1 - nn0;
310 * Gromacs nonbonded kernel nb_kernel123_adress_ex
311 * Coulomb interaction: Normal Coulomb
312 * VdW interaction: Buckingham
313 * water optimization: TIP4P - other atoms
314 * Calculate forces: yes
316 void nb_kernel123_adress_ex(
350 int nri,ntype,nthreads;
351 real facel,krf,crf,tabscale,gbtabscale;
352 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
353 int nn0,nn1,nouter,ninner;
364 real ix1,iy1,iz1,fix1,fiy1,fiz1;
365 real ix2,iy2,iz2,fix2,fiy2,fiz2;
366 real ix3,iy3,iz3,fix3,fiy3,fiz3;
367 real ix4,iy4,iz4,fix4,fiy4,fiz4;
368 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
369 real dx11,dy11,dz11,rsq11,rinv11;
370 real dx21,dy21,dz21,rsq21,rinv21;
371 real dx31,dy31,dz31,rsq31,rinv31;
372 real dx41,dy41,dz41,rsq41,rinv41;
375 real weight_cg1, weight_cg2, weight_product;
380 nthreads = *p_nthreads;
384 tabscale = *p_tabscale;
386 qH = facel*charge[ii+1];
387 qM = facel*charge[ii+3];
388 nti = 3*ntype*type[ii];
395 #ifdef GMX_THREAD_SHM_FDECOMP
396 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
398 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
400 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
407 for(n=nn0; (n<nn1); n++)
411 shY = shiftvec[is3+1];
412 shZ = shiftvec[is3+2];
417 ix1 = shX + pos[ii3+0];
418 iy1 = shY + pos[ii3+1];
419 iz1 = shZ + pos[ii3+2];
420 ix2 = shX + pos[ii3+3];
421 iy2 = shY + pos[ii3+4];
422 iz2 = shZ + pos[ii3+5];
423 ix3 = shX + pos[ii3+6];
424 iy3 = shY + pos[ii3+7];
425 iz3 = shZ + pos[ii3+8];
426 ix4 = shX + pos[ii3+9];
427 iy4 = shY + pos[ii3+10];
428 iz4 = shZ + pos[ii3+11];
445 for(k=nj0; (k<nj1); k++)
448 weight_cg2 = wf[jnr];
449 weight_product = weight_cg1*weight_cg2;
450 if (weight_product < ALMOST_ZERO) {
451 /* force is zero, skip this molecule */
454 else if (weight_product >= ALMOST_ONE)
460 hybscal = weight_product;
469 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
473 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
477 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
481 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41;
482 rinv11 = 1.0/sqrt(rsq11);
483 rinv21 = 1.0/sqrt(rsq21);
484 rinv31 = 1.0/sqrt(rsq31);
485 rinv41 = 1.0/sqrt(rsq41);
486 tj = nti+3*type[jnr];
488 cexp1 = vdwparam[tj+1];
489 cexp2 = vdwparam[tj+2];
490 rinvsq = rinv11*rinv11;
491 rinvsix = rinvsq*rinvsq*rinvsq;
493 br = cexp2*rsq11*rinv11;
494 Vvdwexp = cexp1*exp(-br);
495 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6;
496 fscal = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
498 if(force_cap>0 && (fabs(fscal)> force_cap)){
499 fscal=force_cap*fscal/fabs(fscal);
507 fjx1 = faction[j3+0] - tx;
508 fjy1 = faction[j3+1] - ty;
509 fjz1 = faction[j3+2] - tz;
512 rinvsq = rinv21*rinv21;
515 fscal = (vcoul)*rinvsq;
517 if(force_cap>0 && (fabs(fscal)> force_cap)){
518 fscal=force_cap*fscal/fabs(fscal);
529 rinvsq = rinv31*rinv31;
532 fscal = (vcoul)*rinvsq;
534 if(force_cap>0 && (fabs(fscal)> force_cap)){
535 fscal=force_cap*fscal/fabs(fscal);
547 rinvsq = rinv41*rinv41;
550 fscal = (vcoul)*rinvsq;
552 if(force_cap>0 && (fabs(fscal)> force_cap)){
553 fscal=force_cap*fscal/fabs(fscal);
561 faction[j3+0] = fjx1 - tx;
562 faction[j3+1] = fjy1 - ty;
563 faction[j3+2] = fjz1 - tz;
566 faction[ii3+0] = faction[ii3+0] + fix1;
567 faction[ii3+1] = faction[ii3+1] + fiy1;
568 faction[ii3+2] = faction[ii3+2] + fiz1;
569 faction[ii3+3] = faction[ii3+3] + fix2;
570 faction[ii3+4] = faction[ii3+4] + fiy2;
571 faction[ii3+5] = faction[ii3+5] + fiz2;
572 faction[ii3+6] = faction[ii3+6] + fix3;
573 faction[ii3+7] = faction[ii3+7] + fiy3;
574 faction[ii3+8] = faction[ii3+8] + fiz3;
575 faction[ii3+9] = faction[ii3+9] + fix4;
576 faction[ii3+10] = faction[ii3+10] + fiy4;
577 faction[ii3+11] = faction[ii3+11] + fiz4;
578 fshift[is3] = fshift[is3]+fix1+fix2+fix3+fix4;
579 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
580 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
582 Vc[ggid] = Vc[ggid] + vctot;
583 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
584 ninner = ninner + nj1 - nj0;
587 nouter = nouter + nn1 - nn0;