Merge remote-tracking branch 'origin/release-4-6' into HEAD
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_adress_c / nb_kernel030_c_adress.c
1 /*
2  * Copyright (c) Erik Lindahl, David van der Spoel 2003
3  * 
4  * This file is generated automatically at compile time
5  * by the program mknb in the Gromacs distribution.
6  *
7  * Options used when generation this file:
8  * Language:         c
9  * Precision:        single
10  * Threads:          yes
11  * Software invsqrt: no
12  * PowerPC invsqrt:  no
13  * Prefetch forces:  no
14  * Adress kernel:  yes
15  * Comments:         no
16  */
17 #ifdef HAVE_CONFIG_H
18 #include<config.h>
19 #endif
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
22 #endif
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
25 #include<math.h>
26
27 #include "nb_kernel030_adress.h"
28
29
30
31 /*
32  * Gromacs nonbonded kernel nb_kernel030_adress_cg
33  * Coulomb interaction:     Not calculated
34  * VdW interaction:         Tabulated
35  * water optimization:      No
36  * Calculate forces:        yes
37  */
38 void nb_kernel030_adress_cg(
39                     int *           p_nri,
40                     int *           iinr,
41                     int *           jindex,
42                     int *           jjnr,
43                     int *           shift,
44                     real *         shiftvec,
45                     real *         fshift,
46                     int *           gid,
47                     real *         pos,
48                     real *         faction,
49                     real *         charge,
50                     real *         p_facel,
51                     real *         p_krf,
52                     real *         p_crf,
53                     real *         Vc,
54                     int *           type,
55                     int *           p_ntype,
56                     real *         vdwparam,
57                     real *         Vvdw,
58                     real *         p_tabscale,
59                     real *         VFtab,
60                     real *         invsqrta,
61                     real *         dvda,
62                     real *         p_gbtabscale,
63                     real *         GBtab,
64                     int *           p_nthreads,
65                     int *           count,
66                     void *          mtx,
67                     int *           outeriter,
68                     int *           inneriter,
69                     real           force_cap,
70                     real *         wf)
71 {
72     int           nri,ntype,nthreads;
73     real         facel,krf,crf,tabscale,gbtabscale;
74     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75     int           nn0,nn1,nouter,ninner;
76     real         shX,shY,shZ;
77     real         fscal,tx,ty,tz;
78     int           nti;
79     int           tj;
80     real         Vvdw6,Vvdwtot;
81     real         Vvdw12;
82     real         r,rt,eps,eps2;
83     int           n0,nnn;
84     real         Y,F,Geps,Heps2,Fp,VV;
85     real         FF;
86     real         fijD,fijR;
87     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
88     real         jx1,jy1,jz1;
89     real         dx11,dy11,dz11,rsq11,rinv11;
90     real         c6,c12;
91     real         weight_cg1, weight_cg2, weight_product;
92     real         hybscal;
93
94     nri              = *p_nri;         
95     ntype            = *p_ntype;       
96     nthreads         = *p_nthreads;    
97     facel            = *p_facel;       
98     krf              = *p_krf;         
99     crf              = *p_crf;         
100     tabscale         = *p_tabscale;    
101     nouter           = 0;              
102     ninner           = 0;              
103     
104     do
105     {
106         #ifdef GMX_THREAD_SHM_FDECOMP
107         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
108         nn0              = *count;         
109         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
110         *count           = nn1;            
111         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
112         if(nn1>nri) nn1=nri;
113         #else
114         nn0 = 0;
115         nn1 = nri;
116         #endif
117         
118         for(n=nn0; (n<nn1); n++)
119         {
120             is3              = 3*shift[n];     
121             shX              = shiftvec[is3];  
122             shY              = shiftvec[is3+1];
123             shZ              = shiftvec[is3+2];
124             nj0              = jindex[n];      
125             nj1              = jindex[n+1];    
126             ii               = iinr[n];        
127             ii3              = 3*ii;           
128             ix1              = shX + pos[ii3+0];
129             iy1              = shY + pos[ii3+1];
130             iz1              = shZ + pos[ii3+2];
131             nti              = 2*ntype*type[ii];
132             weight_cg1       = wf[ii];         
133             Vvdwtot          = 0;              
134             fix1             = 0;              
135             fiy1             = 0;              
136             fiz1             = 0;              
137             
138             for(k=nj0; (k<nj1); k++)
139             {
140                 jnr              = jjnr[k];        
141                 weight_cg2       = wf[jnr];        
142                 weight_product   = weight_cg1*weight_cg2;
143                 if (weight_product < ALMOST_ZERO) {
144                        hybscal = 1.0;
145                 }
146                 else if (weight_product >= ALMOST_ONE)
147                 {
148                   /* force is zero, skip this molecule */
149                        continue;
150                 }
151                 else
152                 {
153                    hybscal = 1.0 - weight_product;
154                 }
155                 j3               = 3*jnr;          
156                 jx1              = pos[j3+0];      
157                 jy1              = pos[j3+1];      
158                 jz1              = pos[j3+2];      
159                 dx11             = ix1 - jx1;      
160                 dy11             = iy1 - jy1;      
161                 dz11             = iz1 - jz1;      
162                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
163                 rinv11           = 1.0/sqrt(rsq11);
164                 tj               = nti+2*type[jnr];
165                 c6               = vdwparam[tj];   
166                 c12              = vdwparam[tj+1]; 
167                 r                = rsq11*rinv11;   
168                 rt               = r*tabscale;     
169                 n0               = rt;             
170                 eps              = rt-n0;          
171                 eps2             = eps*eps;        
172                 nnn              = 8*n0;           
173                 Y                = VFtab[nnn];     
174                 F                = VFtab[nnn+1];   
175                 Geps             = eps*VFtab[nnn+2];
176                 Heps2            = eps2*VFtab[nnn+3];
177                 Fp               = F+Geps+Heps2;   
178                 VV               = Y+eps*Fp;       
179                 FF               = Fp+Geps+2.0*Heps2;
180                 Vvdw6            = c6*VV;          
181                 fijD             = c6*FF;          
182                 nnn              = nnn+4;          
183                 Y                = VFtab[nnn];     
184                 F                = VFtab[nnn+1];   
185                 Geps             = eps*VFtab[nnn+2];
186                 Heps2            = eps2*VFtab[nnn+3];
187                 Fp               = F+Geps+Heps2;   
188                 VV               = Y+eps*Fp;       
189                 FF               = Fp+Geps+2.0*Heps2;
190                 Vvdw12           = c12*VV;         
191                 fijR             = c12*FF;         
192                 Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
193                 fscal            = -((fijD+fijR)*tabscale)*rinv11;
194                 fscal *= hybscal;
195                 tx               = fscal*dx11;     
196                 ty               = fscal*dy11;     
197                 tz               = fscal*dz11;     
198                 fix1             = fix1 + tx;      
199                 fiy1             = fiy1 + ty;      
200                 fiz1             = fiz1 + tz;      
201                 faction[j3+0]    = faction[j3+0] - tx;
202                 faction[j3+1]    = faction[j3+1] - ty;
203                 faction[j3+2]    = faction[j3+2] - tz;
204             }
205             
206             faction[ii3+0]   = faction[ii3+0] + fix1;
207             faction[ii3+1]   = faction[ii3+1] + fiy1;
208             faction[ii3+2]   = faction[ii3+2] + fiz1;
209             fshift[is3]      = fshift[is3]+fix1;
210             fshift[is3+1]    = fshift[is3+1]+fiy1;
211             fshift[is3+2]    = fshift[is3+2]+fiz1;
212             ggid             = gid[n];         
213             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
214             ninner           = ninner + nj1 - nj0;
215         }
216         
217         nouter           = nouter + nn1 - nn0;
218     }
219     while (nn1<nri);
220     
221     *outeriter       = nouter;         
222     *inneriter       = ninner;         
223 }
224
225
226
227
228
229 /*
230  * Gromacs nonbonded kernel nb_kernel030_adress_ex
231  * Coulomb interaction:     Not calculated
232  * VdW interaction:         Tabulated
233  * water optimization:      No
234  * Calculate forces:        yes
235  */
236 void nb_kernel030_adress_ex(
237                     int *           p_nri,
238                     int *           iinr,
239                     int *           jindex,
240                     int *           jjnr,
241                     int *           shift,
242                     real *         shiftvec,
243                     real *         fshift,
244                     int *           gid,
245                     real *         pos,
246                     real *         faction,
247                     real *         charge,
248                     real *         p_facel,
249                     real *         p_krf,
250                     real *         p_crf,
251                     real *         Vc,
252                     int *           type,
253                     int *           p_ntype,
254                     real *         vdwparam,
255                     real *         Vvdw,
256                     real *         p_tabscale,
257                     real *         VFtab,
258                     real *         invsqrta,
259                     real *         dvda,
260                     real *         p_gbtabscale,
261                     real *         GBtab,
262                     int *           p_nthreads,
263                     int *           count,
264                     void *          mtx,
265                     int *           outeriter,
266                     int *           inneriter,
267                     real           force_cap,
268                     real *         wf)
269 {
270     int           nri,ntype,nthreads;
271     real         facel,krf,crf,tabscale,gbtabscale;
272     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
273     int           nn0,nn1,nouter,ninner;
274     real         shX,shY,shZ;
275     real         fscal,tx,ty,tz;
276     int           nti;
277     int           tj;
278     real         Vvdw6,Vvdwtot;
279     real         Vvdw12;
280     real         r,rt,eps,eps2;
281     int           n0,nnn;
282     real         Y,F,Geps,Heps2,Fp,VV;
283     real         FF;
284     real         fijD,fijR;
285     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
286     real         jx1,jy1,jz1;
287     real         dx11,dy11,dz11,rsq11,rinv11;
288     real         c6,c12;
289     real         weight_cg1, weight_cg2, weight_product;
290     real         hybscal;
291
292     nri              = *p_nri;         
293     ntype            = *p_ntype;       
294     nthreads         = *p_nthreads;    
295     facel            = *p_facel;       
296     krf              = *p_krf;         
297     crf              = *p_crf;         
298     tabscale         = *p_tabscale;    
299     nouter           = 0;              
300     ninner           = 0;              
301     
302     do
303     {
304         #ifdef GMX_THREAD_SHM_FDECOMP
305         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
306         nn0              = *count;         
307         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
308         *count           = nn1;            
309         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
310         if(nn1>nri) nn1=nri;
311         #else
312         nn0 = 0;
313         nn1 = nri;
314         #endif
315         
316         for(n=nn0; (n<nn1); n++)
317         {
318             is3              = 3*shift[n];     
319             shX              = shiftvec[is3];  
320             shY              = shiftvec[is3+1];
321             shZ              = shiftvec[is3+2];
322             nj0              = jindex[n];      
323             nj1              = jindex[n+1];    
324             ii               = iinr[n];        
325             ii3              = 3*ii;           
326             ix1              = shX + pos[ii3+0];
327             iy1              = shY + pos[ii3+1];
328             iz1              = shZ + pos[ii3+2];
329             nti              = 2*ntype*type[ii];
330             weight_cg1       = wf[ii];         
331             Vvdwtot          = 0;              
332             fix1             = 0;              
333             fiy1             = 0;              
334             fiz1             = 0;              
335             
336             for(k=nj0; (k<nj1); k++)
337             {
338                 jnr              = jjnr[k];        
339                 weight_cg2       = wf[jnr];        
340                 weight_product   = weight_cg1*weight_cg2;
341                 if (weight_product < ALMOST_ZERO) {
342                 /* force is zero, skip this molecule */
343                  continue;
344                 }
345                 else if (weight_product >= ALMOST_ONE)
346                 {
347                        hybscal = 1.0;
348                 }
349                 else
350                 {
351                    hybscal = weight_product;
352                 }
353                 j3               = 3*jnr;          
354                 jx1              = pos[j3+0];      
355                 jy1              = pos[j3+1];      
356                 jz1              = pos[j3+2];      
357                 dx11             = ix1 - jx1;      
358                 dy11             = iy1 - jy1;      
359                 dz11             = iz1 - jz1;      
360                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
361                 rinv11           = 1.0/sqrt(rsq11);
362                 tj               = nti+2*type[jnr];
363                 c6               = vdwparam[tj];   
364                 c12              = vdwparam[tj+1]; 
365                 r                = rsq11*rinv11;   
366                 rt               = r*tabscale;     
367                 n0               = rt;             
368                 eps              = rt-n0;          
369                 eps2             = eps*eps;        
370                 nnn              = 8*n0;           
371                 Y                = VFtab[nnn];     
372                 F                = VFtab[nnn+1];   
373                 Geps             = eps*VFtab[nnn+2];
374                 Heps2            = eps2*VFtab[nnn+3];
375                 Fp               = F+Geps+Heps2;   
376                 VV               = Y+eps*Fp;       
377                 FF               = Fp+Geps+2.0*Heps2;
378                 Vvdw6            = c6*VV;          
379                 fijD             = c6*FF;          
380                 nnn              = nnn+4;          
381                 Y                = VFtab[nnn];     
382                 F                = VFtab[nnn+1];   
383                 Geps             = eps*VFtab[nnn+2];
384                 Heps2            = eps2*VFtab[nnn+3];
385                 Fp               = F+Geps+Heps2;   
386                 VV               = Y+eps*Fp;       
387                 FF               = Fp+Geps+2.0*Heps2;
388                 Vvdw12           = c12*VV;         
389                 fijR             = c12*FF;         
390                 Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
391                 fscal            = -((fijD+fijR)*tabscale)*rinv11;
392                 fscal *= hybscal;
393                 if(force_cap>0 && (fabs(fscal)> force_cap)){
394                 fscal=force_cap*fscal/fabs(fscal);
395                 }
396                 tx               = fscal*dx11;     
397                 ty               = fscal*dy11;     
398                 tz               = fscal*dz11;     
399                 fix1             = fix1 + tx;      
400                 fiy1             = fiy1 + ty;      
401                 fiz1             = fiz1 + tz;      
402                 faction[j3+0]    = faction[j3+0] - tx;
403                 faction[j3+1]    = faction[j3+1] - ty;
404                 faction[j3+2]    = faction[j3+2] - tz;
405             }
406             
407             faction[ii3+0]   = faction[ii3+0] + fix1;
408             faction[ii3+1]   = faction[ii3+1] + fiy1;
409             faction[ii3+2]   = faction[ii3+2] + fiz1;
410             fshift[is3]      = fshift[is3]+fix1;
411             fshift[is3+1]    = fshift[is3+1]+fiy1;
412             fshift[is3+2]    = fshift[is3+2]+fiz1;
413             ggid             = gid[n];         
414             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
415             ninner           = ninner + nj1 - nj0;
416         }
417         
418         nouter           = nouter + nn1 - nn0;
419     }
420     while (nn1<nri);
421     
422     *outeriter       = nouter;         
423     *inneriter       = ninner;         
424 }
425
426