Merge remote-tracking branch 'origin/release-4-6' into HEAD
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_adress_c / nb_kernel330_c_adress.c
1 /*
2  * Copyright (c) Erik Lindahl, David van der Spoel 2003
3  * 
4  * This file is generated automatically at compile time
5  * by the program mknb in the Gromacs distribution.
6  *
7  * Options used when generation this file:
8  * Language:         c
9  * Precision:        single
10  * Threads:          yes
11  * Software invsqrt: no
12  * PowerPC invsqrt:  no
13  * Prefetch forces:  no
14  * Adress kernel:  yes
15  * Comments:         no
16  */
17 #ifdef HAVE_CONFIG_H
18 #include<config.h>
19 #endif
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
22 #endif
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
25 #include<math.h>
26
27 #include "nb_kernel330_adress.h"
28
29
30
31 /*
32  * Gromacs nonbonded kernel nb_kernel330_adress_cg
33  * Coulomb interaction:     Tabulated
34  * VdW interaction:         Tabulated
35  * water optimization:      No
36  * Calculate forces:        yes
37  */
38 void nb_kernel330_adress_cg(
39                     int *           p_nri,
40                     int *           iinr,
41                     int *           jindex,
42                     int *           jjnr,
43                     int *           shift,
44                     real *         shiftvec,
45                     real *         fshift,
46                     int *           gid,
47                     real *         pos,
48                     real *         faction,
49                     real *         charge,
50                     real *         p_facel,
51                     real *         p_krf,
52                     real *         p_crf,
53                     real *         Vc,
54                     int *           type,
55                     int *           p_ntype,
56                     real *         vdwparam,
57                     real *         Vvdw,
58                     real *         p_tabscale,
59                     real *         VFtab,
60                     real *         invsqrta,
61                     real *         dvda,
62                     real *         p_gbtabscale,
63                     real *         GBtab,
64                     int *           p_nthreads,
65                     int *           count,
66                     void *          mtx,
67                     int *           outeriter,
68                     int *           inneriter,
69                     real           force_cap,
70                     real *         wf)
71 {
72     int           nri,ntype,nthreads;
73     real         facel,krf,crf,tabscale,gbtabscale;
74     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75     int           nn0,nn1,nouter,ninner;
76     real         shX,shY,shZ;
77     real         fscal,tx,ty,tz;
78     real         iq;
79     real         qq,vcoul,vctot;
80     int           nti;
81     int           tj;
82     real         Vvdw6,Vvdwtot;
83     real         Vvdw12;
84     real         r,rt,eps,eps2;
85     int           n0,nnn;
86     real         Y,F,Geps,Heps2,Fp,VV;
87     real         FF;
88     real         fijC;
89     real         fijD,fijR;
90     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
91     real         jx1,jy1,jz1;
92     real         dx11,dy11,dz11,rsq11,rinv11;
93     real         c6,c12;
94     real         weight_cg1, weight_cg2, weight_product;
95     real         hybscal;
96
97     nri              = *p_nri;         
98     ntype            = *p_ntype;       
99     nthreads         = *p_nthreads;    
100     facel            = *p_facel;       
101     krf              = *p_krf;         
102     crf              = *p_crf;         
103     tabscale         = *p_tabscale;    
104     nouter           = 0;              
105     ninner           = 0;              
106     
107     do
108     {
109         #ifdef GMX_THREAD_SHM_FDECOMP
110         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
111         nn0              = *count;         
112         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
113         *count           = nn1;            
114         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
115         if(nn1>nri) nn1=nri;
116         #else
117         nn0 = 0;
118         nn1 = nri;
119         #endif
120         
121         for(n=nn0; (n<nn1); n++)
122         {
123             is3              = 3*shift[n];     
124             shX              = shiftvec[is3];  
125             shY              = shiftvec[is3+1];
126             shZ              = shiftvec[is3+2];
127             nj0              = jindex[n];      
128             nj1              = jindex[n+1];    
129             ii               = iinr[n];        
130             ii3              = 3*ii;           
131             ix1              = shX + pos[ii3+0];
132             iy1              = shY + pos[ii3+1];
133             iz1              = shZ + pos[ii3+2];
134             iq               = facel*charge[ii];
135             nti              = 2*ntype*type[ii];
136             weight_cg1       = wf[ii];         
137             vctot            = 0;              
138             Vvdwtot          = 0;              
139             fix1             = 0;              
140             fiy1             = 0;              
141             fiz1             = 0;              
142             
143             for(k=nj0; (k<nj1); k++)
144             {
145                 jnr              = jjnr[k];        
146                 weight_cg2       = wf[jnr];        
147                 weight_product   = weight_cg1*weight_cg2;
148                 if (weight_product < ALMOST_ZERO) {
149                        hybscal = 1.0;
150                 }
151                 else if (weight_product >= ALMOST_ONE)
152                 {
153                   /* force is zero, skip this molecule */
154                        continue;
155                 }
156                 else
157                 {
158                    hybscal = 1.0 - weight_product;
159                 }
160                 j3               = 3*jnr;          
161                 jx1              = pos[j3+0];      
162                 jy1              = pos[j3+1];      
163                 jz1              = pos[j3+2];      
164                 dx11             = ix1 - jx1;      
165                 dy11             = iy1 - jy1;      
166                 dz11             = iz1 - jz1;      
167                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
168                 rinv11           = 1.0/sqrt(rsq11);
169                 qq               = iq*charge[jnr]; 
170                 tj               = nti+2*type[jnr];
171                 c6               = vdwparam[tj];   
172                 c12              = vdwparam[tj+1]; 
173                 r                = rsq11*rinv11;   
174                 rt               = r*tabscale;     
175                 n0               = rt;             
176                 eps              = rt-n0;          
177                 eps2             = eps*eps;        
178                 nnn              = 12*n0;          
179                 Y                = VFtab[nnn];     
180                 F                = VFtab[nnn+1];   
181                 Geps             = eps*VFtab[nnn+2];
182                 Heps2            = eps2*VFtab[nnn+3];
183                 Fp               = F+Geps+Heps2;   
184                 VV               = Y+eps*Fp;       
185                 FF               = Fp+Geps+2.0*Heps2;
186                 vcoul            = qq*VV;          
187                 fijC             = qq*FF;          
188                 vctot            = vctot + vcoul;  
189                 nnn              = nnn+4;          
190                 Y                = VFtab[nnn];     
191                 F                = VFtab[nnn+1];   
192                 Geps             = eps*VFtab[nnn+2];
193                 Heps2            = eps2*VFtab[nnn+3];
194                 Fp               = F+Geps+Heps2;   
195                 VV               = Y+eps*Fp;       
196                 FF               = Fp+Geps+2.0*Heps2;
197                 Vvdw6            = c6*VV;          
198                 fijD             = c6*FF;          
199                 nnn              = nnn+4;          
200                 Y                = VFtab[nnn];     
201                 F                = VFtab[nnn+1];   
202                 Geps             = eps*VFtab[nnn+2];
203                 Heps2            = eps2*VFtab[nnn+3];
204                 Fp               = F+Geps+Heps2;   
205                 VV               = Y+eps*Fp;       
206                 FF               = Fp+Geps+2.0*Heps2;
207                 Vvdw12           = c12*VV;         
208                 fijR             = c12*FF;         
209                 Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
210                 fscal            = -((fijC+fijD+fijR)*tabscale)*rinv11;
211                 fscal *= hybscal;
212                 tx               = fscal*dx11;     
213                 ty               = fscal*dy11;     
214                 tz               = fscal*dz11;     
215                 fix1             = fix1 + tx;      
216                 fiy1             = fiy1 + ty;      
217                 fiz1             = fiz1 + tz;      
218                 faction[j3+0]    = faction[j3+0] - tx;
219                 faction[j3+1]    = faction[j3+1] - ty;
220                 faction[j3+2]    = faction[j3+2] - tz;
221             }
222             
223             faction[ii3+0]   = faction[ii3+0] + fix1;
224             faction[ii3+1]   = faction[ii3+1] + fiy1;
225             faction[ii3+2]   = faction[ii3+2] + fiz1;
226             fshift[is3]      = fshift[is3]+fix1;
227             fshift[is3+1]    = fshift[is3+1]+fiy1;
228             fshift[is3+2]    = fshift[is3+2]+fiz1;
229             ggid             = gid[n];         
230             Vc[ggid]         = Vc[ggid] + vctot;
231             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
232             ninner           = ninner + nj1 - nj0;
233         }
234         
235         nouter           = nouter + nn1 - nn0;
236     }
237     while (nn1<nri);
238     
239     *outeriter       = nouter;         
240     *inneriter       = ninner;         
241 }
242
243
244
245
246
247 /*
248  * Gromacs nonbonded kernel nb_kernel330_adress_ex
249  * Coulomb interaction:     Tabulated
250  * VdW interaction:         Tabulated
251  * water optimization:      No
252  * Calculate forces:        yes
253  */
254 void nb_kernel330_adress_ex(
255                     int *           p_nri,
256                     int *           iinr,
257                     int *           jindex,
258                     int *           jjnr,
259                     int *           shift,
260                     real *         shiftvec,
261                     real *         fshift,
262                     int *           gid,
263                     real *         pos,
264                     real *         faction,
265                     real *         charge,
266                     real *         p_facel,
267                     real *         p_krf,
268                     real *         p_crf,
269                     real *         Vc,
270                     int *           type,
271                     int *           p_ntype,
272                     real *         vdwparam,
273                     real *         Vvdw,
274                     real *         p_tabscale,
275                     real *         VFtab,
276                     real *         invsqrta,
277                     real *         dvda,
278                     real *         p_gbtabscale,
279                     real *         GBtab,
280                     int *           p_nthreads,
281                     int *           count,
282                     void *          mtx,
283                     int *           outeriter,
284                     int *           inneriter,
285                     real           force_cap,
286                     real *         wf)
287 {
288     int           nri,ntype,nthreads;
289     real         facel,krf,crf,tabscale,gbtabscale;
290     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
291     int           nn0,nn1,nouter,ninner;
292     real         shX,shY,shZ;
293     real         fscal,tx,ty,tz;
294     real         iq;
295     real         qq,vcoul,vctot;
296     int           nti;
297     int           tj;
298     real         Vvdw6,Vvdwtot;
299     real         Vvdw12;
300     real         r,rt,eps,eps2;
301     int           n0,nnn;
302     real         Y,F,Geps,Heps2,Fp,VV;
303     real         FF;
304     real         fijC;
305     real         fijD,fijR;
306     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
307     real         jx1,jy1,jz1;
308     real         dx11,dy11,dz11,rsq11,rinv11;
309     real         c6,c12;
310     real         weight_cg1, weight_cg2, weight_product;
311     real         hybscal;
312
313     nri              = *p_nri;         
314     ntype            = *p_ntype;       
315     nthreads         = *p_nthreads;    
316     facel            = *p_facel;       
317     krf              = *p_krf;         
318     crf              = *p_crf;         
319     tabscale         = *p_tabscale;    
320     nouter           = 0;              
321     ninner           = 0;              
322     
323     do
324     {
325         #ifdef GMX_THREAD_SHM_FDECOMP
326         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
327         nn0              = *count;         
328         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
329         *count           = nn1;            
330         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
331         if(nn1>nri) nn1=nri;
332         #else
333         nn0 = 0;
334         nn1 = nri;
335         #endif
336         
337         for(n=nn0; (n<nn1); n++)
338         {
339             is3              = 3*shift[n];     
340             shX              = shiftvec[is3];  
341             shY              = shiftvec[is3+1];
342             shZ              = shiftvec[is3+2];
343             nj0              = jindex[n];      
344             nj1              = jindex[n+1];    
345             ii               = iinr[n];        
346             ii3              = 3*ii;           
347             ix1              = shX + pos[ii3+0];
348             iy1              = shY + pos[ii3+1];
349             iz1              = shZ + pos[ii3+2];
350             iq               = facel*charge[ii];
351             nti              = 2*ntype*type[ii];
352             weight_cg1       = wf[ii];         
353             vctot            = 0;              
354             Vvdwtot          = 0;              
355             fix1             = 0;              
356             fiy1             = 0;              
357             fiz1             = 0;              
358             
359             for(k=nj0; (k<nj1); k++)
360             {
361                 jnr              = jjnr[k];        
362                 weight_cg2       = wf[jnr];        
363                 weight_product   = weight_cg1*weight_cg2;
364                 if (weight_product < ALMOST_ZERO) {
365                 /* force is zero, skip this molecule */
366                  continue;
367                 }
368                 else if (weight_product >= ALMOST_ONE)
369                 {
370                        hybscal = 1.0;
371                 }
372                 else
373                 {
374                    hybscal = weight_product;
375                 }
376                 j3               = 3*jnr;          
377                 jx1              = pos[j3+0];      
378                 jy1              = pos[j3+1];      
379                 jz1              = pos[j3+2];      
380                 dx11             = ix1 - jx1;      
381                 dy11             = iy1 - jy1;      
382                 dz11             = iz1 - jz1;      
383                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
384                 rinv11           = 1.0/sqrt(rsq11);
385                 qq               = iq*charge[jnr]; 
386                 tj               = nti+2*type[jnr];
387                 c6               = vdwparam[tj];   
388                 c12              = vdwparam[tj+1]; 
389                 r                = rsq11*rinv11;   
390                 rt               = r*tabscale;     
391                 n0               = rt;             
392                 eps              = rt-n0;          
393                 eps2             = eps*eps;        
394                 nnn              = 12*n0;          
395                 Y                = VFtab[nnn];     
396                 F                = VFtab[nnn+1];   
397                 Geps             = eps*VFtab[nnn+2];
398                 Heps2            = eps2*VFtab[nnn+3];
399                 Fp               = F+Geps+Heps2;   
400                 VV               = Y+eps*Fp;       
401                 FF               = Fp+Geps+2.0*Heps2;
402                 vcoul            = qq*VV;          
403                 fijC             = qq*FF;          
404                 vctot            = vctot + vcoul;  
405                 nnn              = nnn+4;          
406                 Y                = VFtab[nnn];     
407                 F                = VFtab[nnn+1];   
408                 Geps             = eps*VFtab[nnn+2];
409                 Heps2            = eps2*VFtab[nnn+3];
410                 Fp               = F+Geps+Heps2;   
411                 VV               = Y+eps*Fp;       
412                 FF               = Fp+Geps+2.0*Heps2;
413                 Vvdw6            = c6*VV;          
414                 fijD             = c6*FF;          
415                 nnn              = nnn+4;          
416                 Y                = VFtab[nnn];     
417                 F                = VFtab[nnn+1];   
418                 Geps             = eps*VFtab[nnn+2];
419                 Heps2            = eps2*VFtab[nnn+3];
420                 Fp               = F+Geps+Heps2;   
421                 VV               = Y+eps*Fp;       
422                 FF               = Fp+Geps+2.0*Heps2;
423                 Vvdw12           = c12*VV;         
424                 fijR             = c12*FF;         
425                 Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
426                 fscal            = -((fijC+fijD+fijR)*tabscale)*rinv11;
427                 fscal *= hybscal;
428                 if(force_cap>0 && (fabs(fscal)> force_cap)){
429                 fscal=force_cap*fscal/fabs(fscal);
430                 }
431                 tx               = fscal*dx11;     
432                 ty               = fscal*dy11;     
433                 tz               = fscal*dz11;     
434                 fix1             = fix1 + tx;      
435                 fiy1             = fiy1 + ty;      
436                 fiz1             = fiz1 + tz;      
437                 faction[j3+0]    = faction[j3+0] - tx;
438                 faction[j3+1]    = faction[j3+1] - ty;
439                 faction[j3+2]    = faction[j3+2] - tz;
440             }
441             
442             faction[ii3+0]   = faction[ii3+0] + fix1;
443             faction[ii3+1]   = faction[ii3+1] + fiy1;
444             faction[ii3+2]   = faction[ii3+2] + fiz1;
445             fshift[is3]      = fshift[is3]+fix1;
446             fshift[is3+1]    = fshift[is3+1]+fiy1;
447             fshift[is3+2]    = fshift[is3+2]+fiz1;
448             ggid             = gid[n];         
449             Vc[ggid]         = Vc[ggid] + vctot;
450             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
451             ninner           = ninner + nj1 - nj0;
452         }
453         
454         nouter           = nouter + nn1 - nn0;
455     }
456     while (nn1<nri);
457     
458     *outeriter       = nouter;         
459     *inneriter       = ninner;         
460 }
461
462