2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel202_adress.h"
32 * Gromacs nonbonded kernel nb_kernel202_adress_cg
33 * Coulomb interaction: Reaction field
34 * VdW interaction: Not calculated
35 * water optimization: pairs of SPC/TIP3P interactions
36 * Calculate forces: yes
38 void nb_kernel202_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
81 real ix1,iy1,iz1,fix1,fiy1,fiz1;
82 real ix2,iy2,iz2,fix2,fiy2,fiz2;
83 real ix3,iy3,iz3,fix3,fiy3,fiz3;
84 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
85 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
86 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
87 real dx11,dy11,dz11,rsq11,rinv11;
88 real dx12,dy12,dz12,rsq12,rinv12;
89 real dx13,dy13,dz13,rsq13,rinv13;
90 real dx21,dy21,dz21,rsq21,rinv21;
91 real dx22,dy22,dz22,rsq22,rinv22;
92 real dx23,dy23,dz23,rsq23,rinv23;
93 real dx31,dy31,dz31,rsq31,rinv31;
94 real dx32,dy32,dz32,rsq32,rinv32;
95 real dx33,dy33,dz33,rsq33,rinv33;
96 real qO,qH,qqOO,qqOH,qqHH;
97 real weight_cg1, weight_cg2, weight_product;
102 nthreads = *p_nthreads;
106 tabscale = *p_tabscale;
119 #ifdef GMX_THREAD_SHM_FDECOMP
120 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
122 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
124 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
131 for(n=nn0; (n<nn1); n++)
135 shY = shiftvec[is3+1];
136 shZ = shiftvec[is3+2];
141 ix1 = shX + pos[ii3+0];
142 iy1 = shY + pos[ii3+1];
143 iz1 = shZ + pos[ii3+2];
144 ix2 = shX + pos[ii3+3];
145 iy2 = shY + pos[ii3+4];
146 iz2 = shZ + pos[ii3+5];
147 ix3 = shX + pos[ii3+6];
148 iy3 = shY + pos[ii3+7];
149 iz3 = shZ + pos[ii3+8];
162 for(k=nj0; (k<nj1); k++)
165 weight_cg2 = wf[jnr];
166 weight_product = weight_cg1*weight_cg2;
167 if (weight_product < ALMOST_ZERO) {
170 else if (weight_product >= ALMOST_ONE)
172 /* force is zero, skip this molecule */
177 hybscal = 1.0 - weight_product;
192 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
196 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
200 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
204 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
208 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
212 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
216 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
220 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
224 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
225 rinv11 = 1.0/sqrt(rsq11);
226 rinv12 = 1.0/sqrt(rsq12);
227 rinv13 = 1.0/sqrt(rsq13);
228 rinv21 = 1.0/sqrt(rsq21);
229 rinv22 = 1.0/sqrt(rsq22);
230 rinv23 = 1.0/sqrt(rsq23);
231 rinv31 = 1.0/sqrt(rsq31);
232 rinv32 = 1.0/sqrt(rsq32);
233 rinv33 = 1.0/sqrt(rsq33);
235 rinvsq = rinv11*rinv11;
237 vcoul = qq*(rinv11+krsq-crf);
239 fscal = (qq*(rinv11-2.0*krsq))*rinvsq;
247 fjx1 = faction[j3+0] - tx;
248 fjy1 = faction[j3+1] - ty;
249 fjz1 = faction[j3+2] - tz;
251 rinvsq = rinv12*rinv12;
253 vcoul = qq*(rinv12+krsq-crf);
255 fscal = (qq*(rinv12-2.0*krsq))*rinvsq;
263 fjx2 = faction[j3+3] - tx;
264 fjy2 = faction[j3+4] - ty;
265 fjz2 = faction[j3+5] - tz;
267 rinvsq = rinv13*rinv13;
269 vcoul = qq*(rinv13+krsq-crf);
271 fscal = (qq*(rinv13-2.0*krsq))*rinvsq;
279 fjx3 = faction[j3+6] - tx;
280 fjy3 = faction[j3+7] - ty;
281 fjz3 = faction[j3+8] - tz;
283 rinvsq = rinv21*rinv21;
285 vcoul = qq*(rinv21+krsq-crf);
287 fscal = (qq*(rinv21-2.0*krsq))*rinvsq;
299 rinvsq = rinv22*rinv22;
301 vcoul = qq*(rinv22+krsq-crf);
303 fscal = (qq*(rinv22-2.0*krsq))*rinvsq;
315 rinvsq = rinv23*rinv23;
317 vcoul = qq*(rinv23+krsq-crf);
319 fscal = (qq*(rinv23-2.0*krsq))*rinvsq;
331 rinvsq = rinv31*rinv31;
333 vcoul = qq*(rinv31+krsq-crf);
335 fscal = (qq*(rinv31-2.0*krsq))*rinvsq;
343 faction[j3+0] = fjx1 - tx;
344 faction[j3+1] = fjy1 - ty;
345 faction[j3+2] = fjz1 - tz;
347 rinvsq = rinv32*rinv32;
349 vcoul = qq*(rinv32+krsq-crf);
351 fscal = (qq*(rinv32-2.0*krsq))*rinvsq;
359 faction[j3+3] = fjx2 - tx;
360 faction[j3+4] = fjy2 - ty;
361 faction[j3+5] = fjz2 - tz;
363 rinvsq = rinv33*rinv33;
365 vcoul = qq*(rinv33+krsq-crf);
367 fscal = (qq*(rinv33-2.0*krsq))*rinvsq;
375 faction[j3+6] = fjx3 - tx;
376 faction[j3+7] = fjy3 - ty;
377 faction[j3+8] = fjz3 - tz;
380 faction[ii3+0] = faction[ii3+0] + fix1;
381 faction[ii3+1] = faction[ii3+1] + fiy1;
382 faction[ii3+2] = faction[ii3+2] + fiz1;
383 faction[ii3+3] = faction[ii3+3] + fix2;
384 faction[ii3+4] = faction[ii3+4] + fiy2;
385 faction[ii3+5] = faction[ii3+5] + fiz2;
386 faction[ii3+6] = faction[ii3+6] + fix3;
387 faction[ii3+7] = faction[ii3+7] + fiy3;
388 faction[ii3+8] = faction[ii3+8] + fiz3;
389 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
390 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
391 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
393 Vc[ggid] = Vc[ggid] + vctot;
394 ninner = ninner + nj1 - nj0;
397 nouter = nouter + nn1 - nn0;
410 * Gromacs nonbonded kernel nb_kernel202_adress_ex
411 * Coulomb interaction: Reaction field
412 * VdW interaction: Not calculated
413 * water optimization: pairs of SPC/TIP3P interactions
414 * Calculate forces: yes
416 void nb_kernel202_adress_ex(
450 int nri,ntype,nthreads;
451 real facel,krf,crf,tabscale,gbtabscale;
452 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
453 int nn0,nn1,nouter,ninner;
459 real ix1,iy1,iz1,fix1,fiy1,fiz1;
460 real ix2,iy2,iz2,fix2,fiy2,fiz2;
461 real ix3,iy3,iz3,fix3,fiy3,fiz3;
462 real jx1,jy1,jz1,fjx1,fjy1,fjz1;
463 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
464 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
465 real dx11,dy11,dz11,rsq11,rinv11;
466 real dx12,dy12,dz12,rsq12,rinv12;
467 real dx13,dy13,dz13,rsq13,rinv13;
468 real dx21,dy21,dz21,rsq21,rinv21;
469 real dx22,dy22,dz22,rsq22,rinv22;
470 real dx23,dy23,dz23,rsq23,rinv23;
471 real dx31,dy31,dz31,rsq31,rinv31;
472 real dx32,dy32,dz32,rsq32,rinv32;
473 real dx33,dy33,dz33,rsq33,rinv33;
474 real qO,qH,qqOO,qqOH,qqHH;
475 real weight_cg1, weight_cg2, weight_product;
480 nthreads = *p_nthreads;
484 tabscale = *p_tabscale;
497 #ifdef GMX_THREAD_SHM_FDECOMP
498 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
500 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
502 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
509 for(n=nn0; (n<nn1); n++)
513 shY = shiftvec[is3+1];
514 shZ = shiftvec[is3+2];
519 ix1 = shX + pos[ii3+0];
520 iy1 = shY + pos[ii3+1];
521 iz1 = shZ + pos[ii3+2];
522 ix2 = shX + pos[ii3+3];
523 iy2 = shY + pos[ii3+4];
524 iz2 = shZ + pos[ii3+5];
525 ix3 = shX + pos[ii3+6];
526 iy3 = shY + pos[ii3+7];
527 iz3 = shZ + pos[ii3+8];
540 for(k=nj0; (k<nj1); k++)
543 weight_cg2 = wf[jnr];
544 weight_product = weight_cg1*weight_cg2;
545 if (weight_product < ALMOST_ZERO) {
546 /* force is zero, skip this molecule */
549 else if (weight_product >= ALMOST_ONE)
555 hybscal = weight_product;
570 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11;
574 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12;
578 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13;
582 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21;
586 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
590 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
594 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31;
598 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
602 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
603 rinv11 = 1.0/sqrt(rsq11);
604 rinv12 = 1.0/sqrt(rsq12);
605 rinv13 = 1.0/sqrt(rsq13);
606 rinv21 = 1.0/sqrt(rsq21);
607 rinv22 = 1.0/sqrt(rsq22);
608 rinv23 = 1.0/sqrt(rsq23);
609 rinv31 = 1.0/sqrt(rsq31);
610 rinv32 = 1.0/sqrt(rsq32);
611 rinv33 = 1.0/sqrt(rsq33);
613 rinvsq = rinv11*rinv11;
615 vcoul = qq*(rinv11+krsq-crf);
617 fscal = (qq*(rinv11-2.0*krsq))*rinvsq;
625 fjx1 = faction[j3+0] - tx;
626 fjy1 = faction[j3+1] - ty;
627 fjz1 = faction[j3+2] - tz;
629 rinvsq = rinv12*rinv12;
631 vcoul = qq*(rinv12+krsq-crf);
633 fscal = (qq*(rinv12-2.0*krsq))*rinvsq;
641 fjx2 = faction[j3+3] - tx;
642 fjy2 = faction[j3+4] - ty;
643 fjz2 = faction[j3+5] - tz;
645 rinvsq = rinv13*rinv13;
647 vcoul = qq*(rinv13+krsq-crf);
649 fscal = (qq*(rinv13-2.0*krsq))*rinvsq;
657 fjx3 = faction[j3+6] - tx;
658 fjy3 = faction[j3+7] - ty;
659 fjz3 = faction[j3+8] - tz;
661 rinvsq = rinv21*rinv21;
663 vcoul = qq*(rinv21+krsq-crf);
665 fscal = (qq*(rinv21-2.0*krsq))*rinvsq;
677 rinvsq = rinv22*rinv22;
679 vcoul = qq*(rinv22+krsq-crf);
681 fscal = (qq*(rinv22-2.0*krsq))*rinvsq;
693 rinvsq = rinv23*rinv23;
695 vcoul = qq*(rinv23+krsq-crf);
697 fscal = (qq*(rinv23-2.0*krsq))*rinvsq;
709 rinvsq = rinv31*rinv31;
711 vcoul = qq*(rinv31+krsq-crf);
713 fscal = (qq*(rinv31-2.0*krsq))*rinvsq;
721 faction[j3+0] = fjx1 - tx;
722 faction[j3+1] = fjy1 - ty;
723 faction[j3+2] = fjz1 - tz;
725 rinvsq = rinv32*rinv32;
727 vcoul = qq*(rinv32+krsq-crf);
729 fscal = (qq*(rinv32-2.0*krsq))*rinvsq;
737 faction[j3+3] = fjx2 - tx;
738 faction[j3+4] = fjy2 - ty;
739 faction[j3+5] = fjz2 - tz;
741 rinvsq = rinv33*rinv33;
743 vcoul = qq*(rinv33+krsq-crf);
745 fscal = (qq*(rinv33-2.0*krsq))*rinvsq;
753 faction[j3+6] = fjx3 - tx;
754 faction[j3+7] = fjy3 - ty;
755 faction[j3+8] = fjz3 - tz;
758 faction[ii3+0] = faction[ii3+0] + fix1;
759 faction[ii3+1] = faction[ii3+1] + fiy1;
760 faction[ii3+2] = faction[ii3+2] + fiz1;
761 faction[ii3+3] = faction[ii3+3] + fix2;
762 faction[ii3+4] = faction[ii3+4] + fiy2;
763 faction[ii3+5] = faction[ii3+5] + fiz2;
764 faction[ii3+6] = faction[ii3+6] + fix3;
765 faction[ii3+7] = faction[ii3+7] + fiy3;
766 faction[ii3+8] = faction[ii3+8] + fiz3;
767 fshift[is3] = fshift[is3]+fix1+fix2+fix3;
768 fshift[is3+1] = fshift[is3+1]+fiy1+fiy2+fiy3;
769 fshift[is3+2] = fshift[is3+2]+fiz1+fiz2+fiz3;
771 Vc[ggid] = Vc[ggid] + vctot;
772 ninner = ninner + nj1 - nj0;
775 nouter = nouter + nn1 - nn0;