2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel122_adress.h"
32 * Gromacs nonbonded kernel nb_kernel122_adress_cg
33 * Coulomb interaction: Normal Coulomb
34 * VdW interaction: Buckingham
35 * water optimization: pairs of SPC/TIP3P interactions
36 * Calculate forces: yes
38 void nb_kernel122_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
84 real ix1,iy1,iz1,fix1,fiy1,fiz1;
85 real ix2,iy2,iz2,fix2,fiy2,fiz2;
86 real ix3,iy3,iz3,fix3,fiy3,fiz3;
87 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
88 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
89 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
90 real dx11,dy11,dz11,rsq11,rinv11;
91 real dx12,dy12,dz12,rsq12,rinv12;
92 real dx13,dy13,dz13,rsq13,rinv13;
93 real dx21,dy21,dz21,rsq21,rinv21;
94 real dx22,dy22,dz22,rsq22,rinv22;
95 real dx23,dy23,dz23,rsq23,rinv23;
96 real dx31,dy31,dz31,rsq31,rinv31;
97 real dx32,dy32,dz32,rsq32,rinv32;
98 real dx33,dy33,dz33,rsq33,rinv33;
99 real qO,qH,qqOO,qqOH,qqHH;
101 real weight_cg1, weight_cg2, weight_product;
106 nthreads = *p_nthreads;
110 tabscale = *p_tabscale;
117 tj = 3*(ntype+1)*type[ii];
119 cexp1 = vdwparam[tj+1];
120 cexp2 = vdwparam[tj+2];
127 #ifdef GMX_THREAD_SHM_FDECOMP
128 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
130 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
132 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
139 for(n=nn0; (n<nn1); n++)
143 shY = shiftvec[is3+1];
144 shZ = shiftvec[is3+2];
149 ix1 = shX + pos[ii3+0];
150 iy1 = shY + pos[ii3+1];
151 iz1 = shZ + pos[ii3+2];
152 ix2 = shX + pos[ii3+3];
153 iy2 = shY + pos[ii3+4];
154 iz2 = shZ + pos[ii3+5];
155 ix3 = shX + pos[ii3+6];
156 iy3 = shY + pos[ii3+7];
157 iz3 = shZ + pos[ii3+8];
171 for(k=nj0; (k<nj1); k++)
174 weight_cg2 = wf[jnr];
175 weight_product = weight_cg1*weight_cg2;
176 if (weight_product < ALMOST_ZERO) {
179 else if (weight_product >= ALMOST_ONE)
181 /* force is zero, skip this molecule */
186 hybscal = 1.0 - weight_product;
201 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
205 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
209 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
213 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
217 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
221 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
225 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
229 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
233 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
234 rinv11 = 1.0/sqrt(rsq11);
235 rinv12 = 1.0/sqrt(rsq12);
236 rinv13 = 1.0/sqrt(rsq13);
237 rinv21 = 1.0/sqrt(rsq21);
238 rinv22 = 1.0/sqrt(rsq22);
239 rinv23 = 1.0/sqrt(rsq23);
240 rinv31 = 1.0/sqrt(rsq31);
241 rinv32 = 1.0/sqrt(rsq32);
242 rinv33 = 1.0/sqrt(rsq33);
244 rinvsq = rinv11*rinv11;
247 rinvsix = rinvsq*rinvsq*rinvsq;
249 br = cexp2*rsq11*rinv11;
250 Vvdwexp = cexp1*exp(-br);
251 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6;
252 fscal = (vcoul+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
260 fjx1 = faction[j3+0] - tx;
261 fjy1 = faction[j3+1] - ty;
262 fjz1 = faction[j3+2] - tz;
264 rinvsq = rinv12*rinv12;
267 fscal = (vcoul)*rinvsq;
275 fjx2 = faction[j3+3] - tx;
276 fjy2 = faction[j3+4] - ty;
277 fjz2 = faction[j3+5] - tz;
279 rinvsq = rinv13*rinv13;
282 fscal = (vcoul)*rinvsq;
290 fjx3 = faction[j3+6] - tx;
291 fjy3 = faction[j3+7] - ty;
292 fjz3 = faction[j3+8] - tz;
294 rinvsq = rinv21*rinv21;
297 fscal = (vcoul)*rinvsq;
309 rinvsq = rinv22*rinv22;
312 fscal = (vcoul)*rinvsq;
324 rinvsq = rinv23*rinv23;
327 fscal = (vcoul)*rinvsq;
339 rinvsq = rinv31*rinv31;
342 fscal = (vcoul)*rinvsq;
350 faction[j3+0] = fjx1 - tx;
351 faction[j3+1] = fjy1 - ty;
352 faction[j3+2] = fjz1 - tz;
354 rinvsq = rinv32*rinv32;
357 fscal = (vcoul)*rinvsq;
365 faction[j3+3] = fjx2 - tx;
366 faction[j3+4] = fjy2 - ty;
367 faction[j3+5] = fjz2 - tz;
369 rinvsq = rinv33*rinv33;
372 fscal = (vcoul)*rinvsq;
380 faction[j3+6] = fjx3 - tx;
381 faction[j3+7] = fjy3 - ty;
382 faction[j3+8] = fjz3 - tz;
385 faction[ii3+0] = faction[ii3+0] + fix1;
386 faction[ii3+1] = faction[ii3+1] + fiy1;
387 faction[ii3+2] = faction[ii3+2] + fiz1;
388 faction[ii3+3] = faction[ii3+3] + fix2;
389 faction[ii3+4] = faction[ii3+4] + fiy2;
390 faction[ii3+5] = faction[ii3+5] + fiz2;
391 faction[ii3+6] = faction[ii3+6] + fix3;
392 faction[ii3+7] = faction[ii3+7] + fiy3;
393 faction[ii3+8] = faction[ii3+8] + fiz3;
394 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
395 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
396 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
398 Vc[ggid] = Vc[ggid] + vctot;
399 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
400 ninner = ninner + nj1 - nj0;
403 nouter = nouter + nn1 - nn0;
416 * Gromacs nonbonded kernel nb_kernel122_adress_ex
417 * Coulomb interaction: Normal Coulomb
418 * VdW interaction: Buckingham
419 * water optimization: pairs of SPC/TIP3P interactions
420 * Calculate forces: yes
422 void nb_kernel122_adress_ex(
456 int nri,ntype,nthreads;
457 real facel,krf,crf,tabscale,gbtabscale;
458 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
459 int nn0,nn1,nouter,ninner;
468 real ix1,iy1,iz1,fix1,fiy1,fiz1;
469 real ix2,iy2,iz2,fix2,fiy2,fiz2;
470 real ix3,iy3,iz3,fix3,fiy3,fiz3;
471 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
472 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
473 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
474 real dx11,dy11,dz11,rsq11,rinv11;
475 real dx12,dy12,dz12,rsq12,rinv12;
476 real dx13,dy13,dz13,rsq13,rinv13;
477 real dx21,dy21,dz21,rsq21,rinv21;
478 real dx22,dy22,dz22,rsq22,rinv22;
479 real dx23,dy23,dz23,rsq23,rinv23;
480 real dx31,dy31,dz31,rsq31,rinv31;
481 real dx32,dy32,dz32,rsq32,rinv32;
482 real dx33,dy33,dz33,rsq33,rinv33;
483 real qO,qH,qqOO,qqOH,qqHH;
485 real weight_cg1, weight_cg2, weight_product;
490 nthreads = *p_nthreads;
494 tabscale = *p_tabscale;
501 tj = 3*(ntype+1)*type[ii];
503 cexp1 = vdwparam[tj+1];
504 cexp2 = vdwparam[tj+2];
511 #ifdef GMX_THREAD_SHM_FDECOMP
512 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
514 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
516 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
523 for(n=nn0; (n<nn1); n++)
527 shY = shiftvec[is3+1];
528 shZ = shiftvec[is3+2];
533 ix1 = shX + pos[ii3+0];
534 iy1 = shY + pos[ii3+1];
535 iz1 = shZ + pos[ii3+2];
536 ix2 = shX + pos[ii3+3];
537 iy2 = shY + pos[ii3+4];
538 iz2 = shZ + pos[ii3+5];
539 ix3 = shX + pos[ii3+6];
540 iy3 = shY + pos[ii3+7];
541 iz3 = shZ + pos[ii3+8];
555 for(k=nj0; (k<nj1); k++)
558 weight_cg2 = wf[jnr];
559 weight_product = weight_cg1*weight_cg2;
560 if (weight_product < ALMOST_ZERO) {
561 /* force is zero, skip this molecule */
564 else if (weight_product >= ALMOST_ONE)
570 hybscal = weight_product;
585 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
589 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
593 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
597 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
601 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
605 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
609 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
613 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
617 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
618 rinv11 = 1.0/sqrt(rsq11);
619 rinv12 = 1.0/sqrt(rsq12);
620 rinv13 = 1.0/sqrt(rsq13);
621 rinv21 = 1.0/sqrt(rsq21);
622 rinv22 = 1.0/sqrt(rsq22);
623 rinv23 = 1.0/sqrt(rsq23);
624 rinv31 = 1.0/sqrt(rsq31);
625 rinv32 = 1.0/sqrt(rsq32);
626 rinv33 = 1.0/sqrt(rsq33);
628 rinvsq = rinv11*rinv11;
631 rinvsix = rinvsq*rinvsq*rinvsq;
633 br = cexp2*rsq11*rinv11;
634 Vvdwexp = cexp1*exp(-br);
635 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6;
636 fscal = (vcoul+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
638 if(force_cap>0 && (fabs(fscal)> force_cap)){
639 fscal=force_cap*fscal/fabs(fscal);
647 fjx1 = faction[j3+0] - tx;
648 fjy1 = faction[j3+1] - ty;
649 fjz1 = faction[j3+2] - tz;
651 rinvsq = rinv12*rinv12;
654 fscal = (vcoul)*rinvsq;
656 if(force_cap>0 && (fabs(fscal)> force_cap)){
657 fscal=force_cap*fscal/fabs(fscal);
665 fjx2 = faction[j3+3] - tx;
666 fjy2 = faction[j3+4] - ty;
667 fjz2 = faction[j3+5] - tz;
669 rinvsq = rinv13*rinv13;
672 fscal = (vcoul)*rinvsq;
674 if(force_cap>0 && (fabs(fscal)> force_cap)){
675 fscal=force_cap*fscal/fabs(fscal);
683 fjx3 = faction[j3+6] - tx;
684 fjy3 = faction[j3+7] - ty;
685 fjz3 = faction[j3+8] - tz;
687 rinvsq = rinv21*rinv21;
690 fscal = (vcoul)*rinvsq;
692 if(force_cap>0 && (fabs(fscal)> force_cap)){
693 fscal=force_cap*fscal/fabs(fscal);
705 rinvsq = rinv22*rinv22;
708 fscal = (vcoul)*rinvsq;
710 if(force_cap>0 && (fabs(fscal)> force_cap)){
711 fscal=force_cap*fscal/fabs(fscal);
723 rinvsq = rinv23*rinv23;
726 fscal = (vcoul)*rinvsq;
728 if(force_cap>0 && (fabs(fscal)> force_cap)){
729 fscal=force_cap*fscal/fabs(fscal);
741 rinvsq = rinv31*rinv31;
744 fscal = (vcoul)*rinvsq;
746 if(force_cap>0 && (fabs(fscal)> force_cap)){
747 fscal=force_cap*fscal/fabs(fscal);
755 faction[j3+0] = fjx1 - tx;
756 faction[j3+1] = fjy1 - ty;
757 faction[j3+2] = fjz1 - tz;
759 rinvsq = rinv32*rinv32;
762 fscal = (vcoul)*rinvsq;
764 if(force_cap>0 && (fabs(fscal)> force_cap)){
765 fscal=force_cap*fscal/fabs(fscal);
773 faction[j3+3] = fjx2 - tx;
774 faction[j3+4] = fjy2 - ty;
775 faction[j3+5] = fjz2 - tz;
777 rinvsq = rinv33*rinv33;
780 fscal = (vcoul)*rinvsq;
782 if(force_cap>0 && (fabs(fscal)> force_cap)){
783 fscal=force_cap*fscal/fabs(fscal);
791 faction[j3+6] = fjx3 - tx;
792 faction[j3+7] = fjy3 - ty;
793 faction[j3+8] = fjz3 - tz;
796 faction[ii3+0] = faction[ii3+0] + fix1;
797 faction[ii3+1] = faction[ii3+1] + fiy1;
798 faction[ii3+2] = faction[ii3+2] + fiz1;
799 faction[ii3+3] = faction[ii3+3] + fix2;
800 faction[ii3+4] = faction[ii3+4] + fiy2;
801 faction[ii3+5] = faction[ii3+5] + fiz2;
802 faction[ii3+6] = faction[ii3+6] + fix3;
803 faction[ii3+7] = faction[ii3+7] + fiy3;
804 faction[ii3+8] = faction[ii3+8] + fiz3;
805 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
806 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
807 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
809 Vc[ggid] = Vc[ggid] + vctot;
810 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
811 ninner = ninner + nj1 - nj0;
814 nouter = nouter + nn1 - nn0;