2 * Copyright (c) Erik Lindahl, David van der Spoel 2003
4 * This file is generated automatically at compile time
5 * by the program mknb in the Gromacs distribution.
7 * Options used when generation this file:
11 * Software invsqrt: no
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
27 #include "nb_kernel104_adress.h"
32 * Gromacs nonbonded kernel nb_kernel104_adress_cg
33 * Coulomb interaction: Normal Coulomb
34 * VdW interaction: Not calculated
35 * water optimization: pairs of TIP4P interactions
36 * Calculate forces: yes
38 void nb_kernel104_adress_cg(
72 int nri,ntype,nthreads;
73 real facel,krf,crf,tabscale,gbtabscale;
74 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75 int nn0,nn1,nouter,ninner;
80 real ix2,iy2,iz2,fix2,fiy2,fiz2;
81 real ix3,iy3,iz3,fix3,fiy3,fiz3;
82 real ix4,iy4,iz4,fix4,fiy4,fiz4;
83 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
84 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
85 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
86 real dx22,dy22,dz22,rsq22,rinv22;
87 real dx23,dy23,dz23,rsq23,rinv23;
88 real dx24,dy24,dz24,rsq24,rinv24;
89 real dx32,dy32,dz32,rsq32,rinv32;
90 real dx33,dy33,dz33,rsq33,rinv33;
91 real dx34,dy34,dz34,rsq34,rinv34;
92 real dx42,dy42,dz42,rsq42,rinv42;
93 real dx43,dy43,dz43,rsq43,rinv43;
94 real dx44,dy44,dz44,rsq44,rinv44;
95 real qH,qM,qqMM,qqMH,qqHH;
96 real weight_cg1, weight_cg2, weight_product;
101 nthreads = *p_nthreads;
105 tabscale = *p_tabscale;
118 #ifdef GMX_THREAD_SHM_FDECOMP
119 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
121 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
123 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
130 for(n=nn0; (n<nn1); n++)
134 shY = shiftvec[is3+1];
135 shZ = shiftvec[is3+2];
140 ix2 = shX + pos[ii3+3];
141 iy2 = shY + pos[ii3+4];
142 iz2 = shZ + pos[ii3+5];
143 ix3 = shX + pos[ii3+6];
144 iy3 = shY + pos[ii3+7];
145 iz3 = shZ + pos[ii3+8];
146 ix4 = shX + pos[ii3+9];
147 iy4 = shY + pos[ii3+10];
148 iz4 = shZ + pos[ii3+11];
161 for(k=nj0; (k<nj1); k++)
164 weight_cg2 = wf[jnr];
165 weight_product = weight_cg1*weight_cg2;
166 if (weight_product < ALMOST_ZERO) {
169 else if (weight_product >= ALMOST_ONE)
171 /* force is zero, skip this molecule */
176 hybscal = 1.0 - weight_product;
191 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
195 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
199 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
203 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
207 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
211 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
215 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
219 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
223 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
224 rinv22 = 1.0/sqrt(rsq22);
225 rinv23 = 1.0/sqrt(rsq23);
226 rinv24 = 1.0/sqrt(rsq24);
227 rinv32 = 1.0/sqrt(rsq32);
228 rinv33 = 1.0/sqrt(rsq33);
229 rinv34 = 1.0/sqrt(rsq34);
230 rinv42 = 1.0/sqrt(rsq42);
231 rinv43 = 1.0/sqrt(rsq43);
232 rinv44 = 1.0/sqrt(rsq44);
234 rinvsq = rinv22*rinv22;
237 fscal = (vcoul)*rinvsq;
245 fjx2 = faction[j3+3] - tx;
246 fjy2 = faction[j3+4] - ty;
247 fjz2 = faction[j3+5] - tz;
249 rinvsq = rinv23*rinv23;
252 fscal = (vcoul)*rinvsq;
260 fjx3 = faction[j3+6] - tx;
261 fjy3 = faction[j3+7] - ty;
262 fjz3 = faction[j3+8] - tz;
264 rinvsq = rinv24*rinv24;
267 fscal = (vcoul)*rinvsq;
275 fjx4 = faction[j3+9] - tx;
276 fjy4 = faction[j3+10] - ty;
277 fjz4 = faction[j3+11] - tz;
279 rinvsq = rinv32*rinv32;
282 fscal = (vcoul)*rinvsq;
294 rinvsq = rinv33*rinv33;
297 fscal = (vcoul)*rinvsq;
309 rinvsq = rinv34*rinv34;
312 fscal = (vcoul)*rinvsq;
324 rinvsq = rinv42*rinv42;
327 fscal = (vcoul)*rinvsq;
335 faction[j3+3] = fjx2 - tx;
336 faction[j3+4] = fjy2 - ty;
337 faction[j3+5] = fjz2 - tz;
339 rinvsq = rinv43*rinv43;
342 fscal = (vcoul)*rinvsq;
350 faction[j3+6] = fjx3 - tx;
351 faction[j3+7] = fjy3 - ty;
352 faction[j3+8] = fjz3 - tz;
354 rinvsq = rinv44*rinv44;
357 fscal = (vcoul)*rinvsq;
365 faction[j3+9] = fjx4 - tx;
366 faction[j3+10] = fjy4 - ty;
367 faction[j3+11] = fjz4 - tz;
370 faction[ii3+3] = faction[ii3+3] + fix2;
371 faction[ii3+4] = faction[ii3+4] + fiy2;
372 faction[ii3+5] = faction[ii3+5] + fiz2;
373 faction[ii3+6] = faction[ii3+6] + fix3;
374 faction[ii3+7] = faction[ii3+7] + fiy3;
375 faction[ii3+8] = faction[ii3+8] + fiz3;
376 faction[ii3+9] = faction[ii3+9] + fix4;
377 faction[ii3+10] = faction[ii3+10] + fiy4;
378 faction[ii3+11] = faction[ii3+11] + fiz4;
379 fshift[is3] = fshift[is3]+fix2+fix3+fix4;
380 fshift[is3+1] = fshift[is3+1]+fiy2+fiy3+fiy4;
381 fshift[is3+2] = fshift[is3+2]+fiz2+fiz3+fiz4;
383 Vc[ggid] = Vc[ggid] + vctot;
384 ninner = ninner + nj1 - nj0;
387 nouter = nouter + nn1 - nn0;
400 * Gromacs nonbonded kernel nb_kernel104_adress_ex
401 * Coulomb interaction: Normal Coulomb
402 * VdW interaction: Not calculated
403 * water optimization: pairs of TIP4P interactions
404 * Calculate forces: yes
406 void nb_kernel104_adress_ex(
440 int nri,ntype,nthreads;
441 real facel,krf,crf,tabscale,gbtabscale;
442 int n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
443 int nn0,nn1,nouter,ninner;
448 real ix2,iy2,iz2,fix2,fiy2,fiz2;
449 real ix3,iy3,iz3,fix3,fiy3,fiz3;
450 real ix4,iy4,iz4,fix4,fiy4,fiz4;
451 real jx2,jy2,jz2,fjx2,fjy2,fjz2;
452 real jx3,jy3,jz3,fjx3,fjy3,fjz3;
453 real jx4,jy4,jz4,fjx4,fjy4,fjz4;
454 real dx22,dy22,dz22,rsq22,rinv22;
455 real dx23,dy23,dz23,rsq23,rinv23;
456 real dx24,dy24,dz24,rsq24,rinv24;
457 real dx32,dy32,dz32,rsq32,rinv32;
458 real dx33,dy33,dz33,rsq33,rinv33;
459 real dx34,dy34,dz34,rsq34,rinv34;
460 real dx42,dy42,dz42,rsq42,rinv42;
461 real dx43,dy43,dz43,rsq43,rinv43;
462 real dx44,dy44,dz44,rsq44,rinv44;
463 real qH,qM,qqMM,qqMH,qqHH;
464 real weight_cg1, weight_cg2, weight_product;
469 nthreads = *p_nthreads;
473 tabscale = *p_tabscale;
486 #ifdef GMX_THREAD_SHM_FDECOMP
487 tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
489 nn1 = nn0+(nri-nn0)/(2*nthreads)+10;
491 tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
498 for(n=nn0; (n<nn1); n++)
502 shY = shiftvec[is3+1];
503 shZ = shiftvec[is3+2];
508 ix2 = shX + pos[ii3+3];
509 iy2 = shY + pos[ii3+4];
510 iz2 = shZ + pos[ii3+5];
511 ix3 = shX + pos[ii3+6];
512 iy3 = shY + pos[ii3+7];
513 iz3 = shZ + pos[ii3+8];
514 ix4 = shX + pos[ii3+9];
515 iy4 = shY + pos[ii3+10];
516 iz4 = shZ + pos[ii3+11];
529 for(k=nj0; (k<nj1); k++)
532 weight_cg2 = wf[jnr];
533 weight_product = weight_cg1*weight_cg2;
534 if (weight_product < ALMOST_ZERO) {
535 /* force is zero, skip this molecule */
538 else if (weight_product >= ALMOST_ONE)
544 hybscal = weight_product;
559 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22;
563 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23;
567 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24;
571 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32;
575 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33;
579 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34;
583 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42;
587 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43;
591 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44;
592 rinv22 = 1.0/sqrt(rsq22);
593 rinv23 = 1.0/sqrt(rsq23);
594 rinv24 = 1.0/sqrt(rsq24);
595 rinv32 = 1.0/sqrt(rsq32);
596 rinv33 = 1.0/sqrt(rsq33);
597 rinv34 = 1.0/sqrt(rsq34);
598 rinv42 = 1.0/sqrt(rsq42);
599 rinv43 = 1.0/sqrt(rsq43);
600 rinv44 = 1.0/sqrt(rsq44);
602 rinvsq = rinv22*rinv22;
605 fscal = (vcoul)*rinvsq;
613 fjx2 = faction[j3+3] - tx;
614 fjy2 = faction[j3+4] - ty;
615 fjz2 = faction[j3+5] - tz;
617 rinvsq = rinv23*rinv23;
620 fscal = (vcoul)*rinvsq;
628 fjx3 = faction[j3+6] - tx;
629 fjy3 = faction[j3+7] - ty;
630 fjz3 = faction[j3+8] - tz;
632 rinvsq = rinv24*rinv24;
635 fscal = (vcoul)*rinvsq;
643 fjx4 = faction[j3+9] - tx;
644 fjy4 = faction[j3+10] - ty;
645 fjz4 = faction[j3+11] - tz;
647 rinvsq = rinv32*rinv32;
650 fscal = (vcoul)*rinvsq;
662 rinvsq = rinv33*rinv33;
665 fscal = (vcoul)*rinvsq;
677 rinvsq = rinv34*rinv34;
680 fscal = (vcoul)*rinvsq;
692 rinvsq = rinv42*rinv42;
695 fscal = (vcoul)*rinvsq;
703 faction[j3+3] = fjx2 - tx;
704 faction[j3+4] = fjy2 - ty;
705 faction[j3+5] = fjz2 - tz;
707 rinvsq = rinv43*rinv43;
710 fscal = (vcoul)*rinvsq;
718 faction[j3+6] = fjx3 - tx;
719 faction[j3+7] = fjy3 - ty;
720 faction[j3+8] = fjz3 - tz;
722 rinvsq = rinv44*rinv44;
725 fscal = (vcoul)*rinvsq;
733 faction[j3+9] = fjx4 - tx;
734 faction[j3+10] = fjy4 - ty;
735 faction[j3+11] = fjz4 - tz;
738 faction[ii3+3] = faction[ii3+3] + fix2;
739 faction[ii3+4] = faction[ii3+4] + fiy2;
740 faction[ii3+5] = faction[ii3+5] + fiz2;
741 faction[ii3+6] = faction[ii3+6] + fix3;
742 faction[ii3+7] = faction[ii3+7] + fiy3;
743 faction[ii3+8] = faction[ii3+8] + fiz3;
744 faction[ii3+9] = faction[ii3+9] + fix4;
745 faction[ii3+10] = faction[ii3+10] + fiy4;
746 faction[ii3+11] = faction[ii3+11] + fiz4;
747 fshift[is3] = fshift[is3]+fix2+fix3+fix4;
748 fshift[is3+1] = fshift[is3+1]+fiy2+fiy3+fiy4;
749 fshift[is3+2] = fshift[is3+2]+fiz2+fiz3+fiz4;
751 Vc[ggid] = Vc[ggid] + vctot;
752 ninner = ninner + nj1 - nj0;
755 nouter = nouter + nn1 - nn0;