2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel212_adress.h"
32 * Gromacs nonbonded kernel nb_kernel212_adress_cg
33 * Coulomb interaction: Reaction field
34 * VdW interaction: Lennard-Jones
35 * water optimization: pairs of SPC/TIP3P interactions
36 * Calculate forces: yes
38 void nb_kernel212_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
85 real ix1,iy1,iz1,fix1,fiy1,fiz1;
86 real ix2,iy2,iz2,fix2,fiy2,fiz2;
87 real ix3,iy3,iz3,fix3,fiy3,fiz3;
88 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
89 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
90 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
91 real dx11,dy11,dz11,rsq11,rinv11;
92 real dx12,dy12,dz12,rsq12,rinv12;
93 real dx13,dy13,dz13,rsq13,rinv13;
94 real dx21,dy21,dz21,rsq21,rinv21;
95 real dx22,dy22,dz22,rsq22,rinv22;
96 real dx23,dy23,dz23,rsq23,rinv23;
97 real dx31,dy31,dz31,rsq31,rinv31;
98 real dx32,dy32,dz32,rsq32,rinv32;
99 real dx33,dy33,dz33,rsq33,rinv33;
100 real qO,qH,qqOO,qqOH,qqHH;
102 real weight_cg1, weight_cg2, weight_product;
107 nthreads = *p_nthreads;
111 tabscale = *p_tabscale;
118 tj = 2*(ntype+1)*type[ii];
120 c12 = vdwparam[tj+1];
127 #ifdef GMX_THREAD_SHM_FDECOMP
128 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
130 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
132 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
139 for(n=nn0; (n<nn1); n++)
143 shY = shiftvec[is3+1];
144 shZ = shiftvec[is3+2];
149 ix1 = shX + pos[ii3+0];
150 iy1 = shY + pos[ii3+1];
151 iz1 = shZ + pos[ii3+2];
152 ix2 = shX + pos[ii3+3];
153 iy2 = shY + pos[ii3+4];
154 iz2 = shZ + pos[ii3+5];
155 ix3 = shX + pos[ii3+6];
156 iy3 = shY + pos[ii3+7];
157 iz3 = shZ + pos[ii3+8];
171 for(k=nj0; (k<nj1); k++)
174 weight_cg2 = wf[jnr];
175 weight_product = weight_cg1*weight_cg2;
176 if (weight_product < ALMOST_ZERO) {
179 else if (weight_product >= ALMOST_ONE)
181 /* force is zero, skip this molecule */
186 hybscal = 1.0 - weight_product;
201 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
205 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
209 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
213 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
217 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
221 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
225 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
229 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
233 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
234 rinv11 = 1.0/sqrt(rsq11);
235 rinv12 = 1.0/sqrt(rsq12);
236 rinv13 = 1.0/sqrt(rsq13);
237 rinv21 = 1.0/sqrt(rsq21);
238 rinv22 = 1.0/sqrt(rsq22);
239 rinv23 = 1.0/sqrt(rsq23);
240 rinv31 = 1.0/sqrt(rsq31);
241 rinv32 = 1.0/sqrt(rsq32);
242 rinv33 = 1.0/sqrt(rsq33);
244 rinvsq = rinv11*rinv11;
246 vcoul = qq*(rinv11+krsq-crf);
248 rinvsix = rinvsq*rinvsq*rinvsq;
250 Vvdw12 = c12*rinvsix*rinvsix;
251 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
252 fscal = (qq*(rinv11-2.0*krsq)+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
260 fjx1 = faction[j3+0] - tx;
261 fjy1 = faction[j3+1] - ty;
262 fjz1 = faction[j3+2] - tz;
264 rinvsq = rinv12*rinv12;
266 vcoul = qq*(rinv12+krsq-crf);
268 fscal = (qq*(rinv12-2.0*krsq))*rinvsq;
276 fjx2 = faction[j3+3] - tx;
277 fjy2 = faction[j3+4] - ty;
278 fjz2 = faction[j3+5] - tz;
280 rinvsq = rinv13*rinv13;
282 vcoul = qq*(rinv13+krsq-crf);
284 fscal = (qq*(rinv13-2.0*krsq))*rinvsq;
292 fjx3 = faction[j3+6] - tx;
293 fjy3 = faction[j3+7] - ty;
294 fjz3 = faction[j3+8] - tz;
296 rinvsq = rinv21*rinv21;
298 vcoul = qq*(rinv21+krsq-crf);
300 fscal = (qq*(rinv21-2.0*krsq))*rinvsq;
312 rinvsq = rinv22*rinv22;
314 vcoul = qq*(rinv22+krsq-crf);
316 fscal = (qq*(rinv22-2.0*krsq))*rinvsq;
328 rinvsq = rinv23*rinv23;
330 vcoul = qq*(rinv23+krsq-crf);
332 fscal = (qq*(rinv23-2.0*krsq))*rinvsq;
344 rinvsq = rinv31*rinv31;
346 vcoul = qq*(rinv31+krsq-crf);
348 fscal = (qq*(rinv31-2.0*krsq))*rinvsq;
356 faction[j3+0] = fjx1 - tx;
357 faction[j3+1] = fjy1 - ty;
358 faction[j3+2] = fjz1 - tz;
360 rinvsq = rinv32*rinv32;
362 vcoul = qq*(rinv32+krsq-crf);
364 fscal = (qq*(rinv32-2.0*krsq))*rinvsq;
372 faction[j3+3] = fjx2 - tx;
373 faction[j3+4] = fjy2 - ty;
374 faction[j3+5] = fjz2 - tz;
376 rinvsq = rinv33*rinv33;
378 vcoul = qq*(rinv33+krsq-crf);
380 fscal = (qq*(rinv33-2.0*krsq))*rinvsq;
388 faction[j3+6] = fjx3 - tx;
389 faction[j3+7] = fjy3 - ty;
390 faction[j3+8] = fjz3 - tz;
393 faction[ii3+0] = faction[ii3+0] + fix1;
394 faction[ii3+1] = faction[ii3+1] + fiy1;
395 faction[ii3+2] = faction[ii3+2] + fiz1;
396 faction[ii3+3] = faction[ii3+3] + fix2;
397 faction[ii3+4] = faction[ii3+4] + fiy2;
398 faction[ii3+5] = faction[ii3+5] + fiz2;
399 faction[ii3+6] = faction[ii3+6] + fix3;
400 faction[ii3+7] = faction[ii3+7] + fiy3;
401 faction[ii3+8] = faction[ii3+8] + fiz3;
402 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
403 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
404 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
406 Vc[ggid] = Vc[ggid] + vctot;
407 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
408 ninner = ninner + nj1 - nj0;
411 nouter = nouter + nn1 - nn0;
424 * Gromacs nonbonded kernel nb_kernel212_adress_ex
425 * Coulomb interaction: Reaction field
426 * VdW interaction: Lennard-Jones
427 * water optimization: pairs of SPC/TIP3P interactions
428 * Calculate forces: yes
430 void nb_kernel212_adress_ex(
464 int nri,ntype,nthreads;
465 real facel,krf,crf,tabscale,gbtabscale;
466 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
467 int nn0,nn1,nouter,ninner;
477 real ix1,iy1,iz1,fix1,fiy1,fiz1;
478 real ix2,iy2,iz2,fix2,fiy2,fiz2;
479 real ix3,iy3,iz3,fix3,fiy3,fiz3;
480 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
481 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
482 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
483 real dx11,dy11,dz11,rsq11,rinv11;
484 real dx12,dy12,dz12,rsq12,rinv12;
485 real dx13,dy13,dz13,rsq13,rinv13;
486 real dx21,dy21,dz21,rsq21,rinv21;
487 real dx22,dy22,dz22,rsq22,rinv22;
488 real dx23,dy23,dz23,rsq23,rinv23;
489 real dx31,dy31,dz31,rsq31,rinv31;
490 real dx32,dy32,dz32,rsq32,rinv32;
491 real dx33,dy33,dz33,rsq33,rinv33;
492 real qO,qH,qqOO,qqOH,qqHH;
494 real weight_cg1, weight_cg2, weight_product;
499 nthreads = *p_nthreads;
503 tabscale = *p_tabscale;
510 tj = 2*(ntype+1)*type[ii];
512 c12 = vdwparam[tj+1];
519 #ifdef GMX_THREAD_SHM_FDECOMP
520 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
522 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
524 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
531 for(n=nn0; (n<nn1); n++)
535 shY = shiftvec[is3+1];
536 shZ = shiftvec[is3+2];
541 ix1 = shX + pos[ii3+0];
542 iy1 = shY + pos[ii3+1];
543 iz1 = shZ + pos[ii3+2];
544 ix2 = shX + pos[ii3+3];
545 iy2 = shY + pos[ii3+4];
546 iz2 = shZ + pos[ii3+5];
547 ix3 = shX + pos[ii3+6];
548 iy3 = shY + pos[ii3+7];
549 iz3 = shZ + pos[ii3+8];
563 for(k=nj0; (k<nj1); k++)
566 weight_cg2 = wf[jnr];
567 weight_product = weight_cg1*weight_cg2;
568 if (weight_product < ALMOST_ZERO) {
569 /* force is zero, skip this molecule */
572 else if (weight_product >= ALMOST_ONE)
578 hybscal = weight_product;
593 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
597 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
601 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
605 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
609 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
613 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
617 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
621 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
625 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
626 rinv11 = 1.0/sqrt(rsq11);
627 rinv12 = 1.0/sqrt(rsq12);
628 rinv13 = 1.0/sqrt(rsq13);
629 rinv21 = 1.0/sqrt(rsq21);
630 rinv22 = 1.0/sqrt(rsq22);
631 rinv23 = 1.0/sqrt(rsq23);
632 rinv31 = 1.0/sqrt(rsq31);
633 rinv32 = 1.0/sqrt(rsq32);
634 rinv33 = 1.0/sqrt(rsq33);
636 rinvsq = rinv11*rinv11;
638 vcoul = qq*(rinv11+krsq-crf);
640 rinvsix = rinvsq*rinvsq*rinvsq;
642 Vvdw12 = c12*rinvsix*rinvsix;
643 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
644 fscal = (qq*(rinv11-2.0*krsq)+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
646 if(force_cap>0 && (fabs(fscal)> force_cap)){
647 fscal=force_cap*fscal/fabs(fscal);
655 fjx1 = faction[j3+0] - tx;
656 fjy1 = faction[j3+1] - ty;
657 fjz1 = faction[j3+2] - tz;
659 rinvsq = rinv12*rinv12;
661 vcoul = qq*(rinv12+krsq-crf);
663 fscal = (qq*(rinv12-2.0*krsq))*rinvsq;
665 if(force_cap>0 && (fabs(fscal)> force_cap)){
666 fscal=force_cap*fscal/fabs(fscal);
674 fjx2 = faction[j3+3] - tx;
675 fjy2 = faction[j3+4] - ty;
676 fjz2 = faction[j3+5] - tz;
678 rinvsq = rinv13*rinv13;
680 vcoul = qq*(rinv13+krsq-crf);
682 fscal = (qq*(rinv13-2.0*krsq))*rinvsq;
684 if(force_cap>0 && (fabs(fscal)> force_cap)){
685 fscal=force_cap*fscal/fabs(fscal);
693 fjx3 = faction[j3+6] - tx;
694 fjy3 = faction[j3+7] - ty;
695 fjz3 = faction[j3+8] - tz;
697 rinvsq = rinv21*rinv21;
699 vcoul = qq*(rinv21+krsq-crf);
701 fscal = (qq*(rinv21-2.0*krsq))*rinvsq;
703 if(force_cap>0 && (fabs(fscal)> force_cap)){
704 fscal=force_cap*fscal/fabs(fscal);
716 rinvsq = rinv22*rinv22;
718 vcoul = qq*(rinv22+krsq-crf);
720 fscal = (qq*(rinv22-2.0*krsq))*rinvsq;
722 if(force_cap>0 && (fabs(fscal)> force_cap)){
723 fscal=force_cap*fscal/fabs(fscal);
735 rinvsq = rinv23*rinv23;
737 vcoul = qq*(rinv23+krsq-crf);
739 fscal = (qq*(rinv23-2.0*krsq))*rinvsq;
741 if(force_cap>0 && (fabs(fscal)> force_cap)){
742 fscal=force_cap*fscal/fabs(fscal);
754 rinvsq = rinv31*rinv31;
756 vcoul = qq*(rinv31+krsq-crf);
758 fscal = (qq*(rinv31-2.0*krsq))*rinvsq;
760 if(force_cap>0 && (fabs(fscal)> force_cap)){
761 fscal=force_cap*fscal/fabs(fscal);
769 faction[j3+0] = fjx1 - tx;
770 faction[j3+1] = fjy1 - ty;
771 faction[j3+2] = fjz1 - tz;
773 rinvsq = rinv32*rinv32;
775 vcoul = qq*(rinv32+krsq-crf);
777 fscal = (qq*(rinv32-2.0*krsq))*rinvsq;
779 if(force_cap>0 && (fabs(fscal)> force_cap)){
780 fscal=force_cap*fscal/fabs(fscal);
788 faction[j3+3] = fjx2 - tx;
789 faction[j3+4] = fjy2 - ty;
790 faction[j3+5] = fjz2 - tz;
792 rinvsq = rinv33*rinv33;
794 vcoul = qq*(rinv33+krsq-crf);
796 fscal = (qq*(rinv33-2.0*krsq))*rinvsq;
798 if(force_cap>0 && (fabs(fscal)> force_cap)){
799 fscal=force_cap*fscal/fabs(fscal);
807 faction[j3+6] = fjx3 - tx;
808 faction[j3+7] = fjy3 - ty;
809 faction[j3+8] = fjz3 - tz;
812 faction[ii3+0] = faction[ii3+0] + fix1;
813 faction[ii3+1] = faction[ii3+1] + fiy1;
814 faction[ii3+2] = faction[ii3+2] + fiz1;
815 faction[ii3+3] = faction[ii3+3] + fix2;
816 faction[ii3+4] = faction[ii3+4] + fiy2;
817 faction[ii3+5] = faction[ii3+5] + fiz2;
818 faction[ii3+6] = faction[ii3+6] + fix3;
819 faction[ii3+7] = faction[ii3+7] + fiy3;
820 faction[ii3+8] = faction[ii3+8] + fiz3;
821 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
822 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
823 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
825 Vc[ggid] = Vc[ggid] + vctot;
826 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
827 ninner = ninner + nj1 - nj0;
830 nouter = nouter + nn1 - nn0;