2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel204_adress.h"
32 * Gromacs nonbonded kernel nb_kernel204_adress_cg
33 * Coulomb interaction: Reaction field
34 * VdW interaction: Not calculated
35 * water optimization: pairs of TIP4P interactions
36 * Calculate forces: yes
38 void nb_kernel204_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
81 real ix2,iy2,iz2,fix2,fiy2,fiz2;
82 real ix3,iy3,iz3,fix3,fiy3,fiz3;
83 real ix4,iy4,iz4,fix4,fiy4,fiz4;
84 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
85 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
86 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
87 real dx22,dy22,dz22,rsq22,rinv22;
88 real dx23,dy23,dz23,rsq23,rinv23;
89 real dx24,dy24,dz24,rsq24,rinv24;
90 real dx32,dy32,dz32,rsq32,rinv32;
91 real dx33,dy33,dz33,rsq33,rinv33;
92 real dx34,dy34,dz34,rsq34,rinv34;
93 real dx42,dy42,dz42,rsq42,rinv42;
94 real dx43,dy43,dz43,rsq43,rinv43;
95 real dx44,dy44,dz44,rsq44,rinv44;
96 real qH,qM,qqMM,qqMH,qqHH;
97 real weight_cg1, weight_cg2, weight_product;
102 nthreads = *p_nthreads;
106 tabscale = *p_tabscale;
119 #ifdef GMX_THREAD_SHM_FDECOMP
120 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
122 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
124 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
131 for(n=nn0; (n<nn1); n++)
135 shY = shiftvec[is3+1];
136 shZ = shiftvec[is3+2];
141 ix2 = shX + pos[ii3+3];
142 iy2 = shY + pos[ii3+4];
143 iz2 = shZ + pos[ii3+5];
144 ix3 = shX + pos[ii3+6];
145 iy3 = shY + pos[ii3+7];
146 iz3 = shZ + pos[ii3+8];
147 ix4 = shX + pos[ii3+9];
148 iy4 = shY + pos[ii3+10];
149 iz4 = shZ + pos[ii3+11];
162 for(k=nj0; (k<nj1); k++)
165 weight_cg2 = wf[jnr];
166 weight_product = weight_cg1*weight_cg2;
167 if (weight_product < ALMOST_ZERO) {
170 else if (weight_product >= ALMOST_ONE)
172 /* force is zero, skip this molecule */
177 hybscal = 1.0 - weight_product;
192 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
196 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
200 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
204 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
208 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
212 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
216 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
220 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
224 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
225 rinv22 = 1.0/sqrt(rsq22);
226 rinv23 = 1.0/sqrt(rsq23);
227 rinv24 = 1.0/sqrt(rsq24);
228 rinv32 = 1.0/sqrt(rsq32);
229 rinv33 = 1.0/sqrt(rsq33);
230 rinv34 = 1.0/sqrt(rsq34);
231 rinv42 = 1.0/sqrt(rsq42);
232 rinv43 = 1.0/sqrt(rsq43);
233 rinv44 = 1.0/sqrt(rsq44);
235 rinvsq = rinv22*rinv22;
237 vcoul = qq*(rinv22+krsq-crf);
239 fscal = (qq*(rinv22-2.0*krsq))*rinvsq;
247 fjx2 = faction[j3+3] - tx;
248 fjy2 = faction[j3+4] - ty;
249 fjz2 = faction[j3+5] - tz;
251 rinvsq = rinv23*rinv23;
253 vcoul = qq*(rinv23+krsq-crf);
255 fscal = (qq*(rinv23-2.0*krsq))*rinvsq;
263 fjx3 = faction[j3+6] - tx;
264 fjy3 = faction[j3+7] - ty;
265 fjz3 = faction[j3+8] - tz;
267 rinvsq = rinv24*rinv24;
269 vcoul = qq*(rinv24+krsq-crf);
271 fscal = (qq*(rinv24-2.0*krsq))*rinvsq;
279 fjx4 = faction[j3+9] - tx;
280 fjy4 = faction[j3+10] - ty;
281 fjz4 = faction[j3+11] - tz;
283 rinvsq = rinv32*rinv32;
285 vcoul = qq*(rinv32+krsq-crf);
287 fscal = (qq*(rinv32-2.0*krsq))*rinvsq;
299 rinvsq = rinv33*rinv33;
301 vcoul = qq*(rinv33+krsq-crf);
303 fscal = (qq*(rinv33-2.0*krsq))*rinvsq;
315 rinvsq = rinv34*rinv34;
317 vcoul = qq*(rinv34+krsq-crf);
319 fscal = (qq*(rinv34-2.0*krsq))*rinvsq;
331 rinvsq = rinv42*rinv42;
333 vcoul = qq*(rinv42+krsq-crf);
335 fscal = (qq*(rinv42-2.0*krsq))*rinvsq;
343 faction[j3+3] = fjx2 - tx;
344 faction[j3+4] = fjy2 - ty;
345 faction[j3+5] = fjz2 - tz;
347 rinvsq = rinv43*rinv43;
349 vcoul = qq*(rinv43+krsq-crf);
351 fscal = (qq*(rinv43-2.0*krsq))*rinvsq;
359 faction[j3+6] = fjx3 - tx;
360 faction[j3+7] = fjy3 - ty;
361 faction[j3+8] = fjz3 - tz;
363 rinvsq = rinv44*rinv44;
365 vcoul = qq*(rinv44+krsq-crf);
367 fscal = (qq*(rinv44-2.0*krsq))*rinvsq;
375 faction[j3+9] = fjx4 - tx;
376 faction[j3+10] = fjy4 - ty;
377 faction[j3+11] = fjz4 - tz;
380 faction[ii3+3] = faction[ii3+3] + fix2;
381 faction[ii3+4] = faction[ii3+4] + fiy2;
382 faction[ii3+5] = faction[ii3+5] + fiz2;
383 faction[ii3+6] = faction[ii3+6] + fix3;
384 faction[ii3+7] = faction[ii3+7] + fiy3;
385 faction[ii3+8] = faction[ii3+8] + fiz3;
386 faction[ii3+9] = faction[ii3+9] + fix4;
387 faction[ii3+10] = faction[ii3+10] + fiy4;
388 faction[ii3+11] = faction[ii3+11] + fiz4;
389 fshift[is3] = fshift[is3]+fix2+fix3+fix4;
390 fshift[is3+1] = fshift[is3+1]+fiy2+fiy3+fiy4;
391 fshift[is3+2] = fshift[is3+2]+fiz2+fiz3+fiz4;
393 Vc[ggid] = Vc[ggid] + vctot;
394 ninner = ninner + nj1 - nj0;
397 nouter = nouter + nn1 - nn0;
410 * Gromacs nonbonded kernel nb_kernel204_adress_ex
411 * Coulomb interaction: Reaction field
412 * VdW interaction: Not calculated
413 * water optimization: pairs of TIP4P interactions
414 * Calculate forces: yes
416 void nb_kernel204_adress_ex(
450 int nri,ntype,nthreads;
451 real facel,krf,crf,tabscale,gbtabscale;
452 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
453 int nn0,nn1,nouter,ninner;
459 real ix2,iy2,iz2,fix2,fiy2,fiz2;
460 real ix3,iy3,iz3,fix3,fiy3,fiz3;
461 real ix4,iy4,iz4,fix4,fiy4,fiz4;
462 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
463 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
464 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
465 real dx22,dy22,dz22,rsq22,rinv22;
466 real dx23,dy23,dz23,rsq23,rinv23;
467 real dx24,dy24,dz24,rsq24,rinv24;
468 real dx32,dy32,dz32,rsq32,rinv32;
469 real dx33,dy33,dz33,rsq33,rinv33;
470 real dx34,dy34,dz34,rsq34,rinv34;
471 real dx42,dy42,dz42,rsq42,rinv42;
472 real dx43,dy43,dz43,rsq43,rinv43;
473 real dx44,dy44,dz44,rsq44,rinv44;
474 real qH,qM,qqMM,qqMH,qqHH;
475 real weight_cg1, weight_cg2, weight_product;
480 nthreads = *p_nthreads;
484 tabscale = *p_tabscale;
497 #ifdef GMX_THREAD_SHM_FDECOMP
498 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
500 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
502 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
509 for(n=nn0; (n<nn1); n++)
513 shY = shiftvec[is3+1];
514 shZ = shiftvec[is3+2];
519 ix2 = shX + pos[ii3+3];
520 iy2 = shY + pos[ii3+4];
521 iz2 = shZ + pos[ii3+5];
522 ix3 = shX + pos[ii3+6];
523 iy3 = shY + pos[ii3+7];
524 iz3 = shZ + pos[ii3+8];
525 ix4 = shX + pos[ii3+9];
526 iy4 = shY + pos[ii3+10];
527 iz4 = shZ + pos[ii3+11];
540 for(k=nj0; (k<nj1); k++)
543 weight_cg2 = wf[jnr];
544 weight_product = weight_cg1*weight_cg2;
545 if (weight_product < ALMOST_ZERO) {
546 /* force is zero, skip this molecule */
549 else if (weight_product >= ALMOST_ONE)
555 hybscal = weight_product;
570 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
574 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
578 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
582 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
586 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
590 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
594 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
598 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
602 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
603 rinv22 = 1.0/sqrt(rsq22);
604 rinv23 = 1.0/sqrt(rsq23);
605 rinv24 = 1.0/sqrt(rsq24);
606 rinv32 = 1.0/sqrt(rsq32);
607 rinv33 = 1.0/sqrt(rsq33);
608 rinv34 = 1.0/sqrt(rsq34);
609 rinv42 = 1.0/sqrt(rsq42);
610 rinv43 = 1.0/sqrt(rsq43);
611 rinv44 = 1.0/sqrt(rsq44);
613 rinvsq = rinv22*rinv22;
615 vcoul = qq*(rinv22+krsq-crf);
617 fscal = (qq*(rinv22-2.0*krsq))*rinvsq;
625 fjx2 = faction[j3+3] - tx;
626 fjy2 = faction[j3+4] - ty;
627 fjz2 = faction[j3+5] - tz;
629 rinvsq = rinv23*rinv23;
631 vcoul = qq*(rinv23+krsq-crf);
633 fscal = (qq*(rinv23-2.0*krsq))*rinvsq;
641 fjx3 = faction[j3+6] - tx;
642 fjy3 = faction[j3+7] - ty;
643 fjz3 = faction[j3+8] - tz;
645 rinvsq = rinv24*rinv24;
647 vcoul = qq*(rinv24+krsq-crf);
649 fscal = (qq*(rinv24-2.0*krsq))*rinvsq;
657 fjx4 = faction[j3+9] - tx;
658 fjy4 = faction[j3+10] - ty;
659 fjz4 = faction[j3+11] - tz;
661 rinvsq = rinv32*rinv32;
663 vcoul = qq*(rinv32+krsq-crf);
665 fscal = (qq*(rinv32-2.0*krsq))*rinvsq;
677 rinvsq = rinv33*rinv33;
679 vcoul = qq*(rinv33+krsq-crf);
681 fscal = (qq*(rinv33-2.0*krsq))*rinvsq;
693 rinvsq = rinv34*rinv34;
695 vcoul = qq*(rinv34+krsq-crf);
697 fscal = (qq*(rinv34-2.0*krsq))*rinvsq;
709 rinvsq = rinv42*rinv42;
711 vcoul = qq*(rinv42+krsq-crf);
713 fscal = (qq*(rinv42-2.0*krsq))*rinvsq;
721 faction[j3+3] = fjx2 - tx;
722 faction[j3+4] = fjy2 - ty;
723 faction[j3+5] = fjz2 - tz;
725 rinvsq = rinv43*rinv43;
727 vcoul = qq*(rinv43+krsq-crf);
729 fscal = (qq*(rinv43-2.0*krsq))*rinvsq;
737 faction[j3+6] = fjx3 - tx;
738 faction[j3+7] = fjy3 - ty;
739 faction[j3+8] = fjz3 - tz;
741 rinvsq = rinv44*rinv44;
743 vcoul = qq*(rinv44+krsq-crf);
745 fscal = (qq*(rinv44-2.0*krsq))*rinvsq;
753 faction[j3+9] = fjx4 - tx;
754 faction[j3+10] = fjy4 - ty;
755 faction[j3+11] = fjz4 - tz;
758 faction[ii3+3] = faction[ii3+3] + fix2;
759 faction[ii3+4] = faction[ii3+4] + fiy2;
760 faction[ii3+5] = faction[ii3+5] + fiz2;
761 faction[ii3+6] = faction[ii3+6] + fix3;
762 faction[ii3+7] = faction[ii3+7] + fiy3;
763 faction[ii3+8] = faction[ii3+8] + fiz3;
764 faction[ii3+9] = faction[ii3+9] + fix4;
765 faction[ii3+10] = faction[ii3+10] + fiy4;
766 faction[ii3+11] = faction[ii3+11] + fiz4;
767 fshift[is3] = fshift[is3]+fix2+fix3+fix4;
768 fshift[is3+1] = fshift[is3+1]+fiy2+fiy3+fiy4;
769 fshift[is3+2] = fshift[is3+2]+fiz2+fiz3+fiz4;
771 Vc[ggid] = Vc[ggid] + vctot;
772 ninner = ninner + nj1 - nj0;
775 nouter = nouter + nn1 - nn0;