Merge remote-tracking branch 'origin/release-4-6' into HEAD
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_adress_c / nb_kernel300_c_adress.c
1 /*
2  * Copyright (c) Erik Lindahl, David van der Spoel 2003
3  * 
4  * This file is generated automatically at compile time
5  * by the program mknb in the Gromacs distribution.
6  *
7  * Options used when generation this file:
8  * Language:         c
9  * Precision:        single
10  * Threads:          yes
11  * Software invsqrt: no
12  * PowerPC invsqrt:  no
13  * Prefetch forces:  no
14  * Adress kernel:  yes
15  * Comments:         no
16  */
17 #ifdef HAVE_CONFIG_H
18 #include<config.h>
19 #endif
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
22 #endif
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
25 #include<math.h>
26
27 #include "nb_kernel300_adress.h"
28
29
30
31 /*
32  * Gromacs nonbonded kernel nb_kernel300_adress_cg
33  * Coulomb interaction:     Tabulated
34  * VdW interaction:         Not calculated
35  * water optimization:      No
36  * Calculate forces:        yes
37  */
38 void nb_kernel300_adress_cg(
39                     int *           p_nri,
40                     int *           iinr,
41                     int *           jindex,
42                     int *           jjnr,
43                     int *           shift,
44                     real *         shiftvec,
45                     real *         fshift,
46                     int *           gid,
47                     real *         pos,
48                     real *         faction,
49                     real *         charge,
50                     real *         p_facel,
51                     real *         p_krf,
52                     real *         p_crf,
53                     real *         Vc,
54                     int *           type,
55                     int *           p_ntype,
56                     real *         vdwparam,
57                     real *         Vvdw,
58                     real *         p_tabscale,
59                     real *         VFtab,
60                     real *         invsqrta,
61                     real *         dvda,
62                     real *         p_gbtabscale,
63                     real *         GBtab,
64                     int *           p_nthreads,
65                     int *           count,
66                     void *          mtx,
67                     int *           outeriter,
68                     int *           inneriter,
69                     real           force_cap,
70                     real *         wf)
71 {
72     int           nri,ntype,nthreads;
73     real         facel,krf,crf,tabscale,gbtabscale;
74     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75     int           nn0,nn1,nouter,ninner;
76     real         shX,shY,shZ;
77     real         fscal,tx,ty,tz;
78     real         iq;
79     real         qq,vcoul,vctot;
80     real         r,rt,eps,eps2;
81     int           n0,nnn;
82     real         Y,F,Geps,Heps2,Fp,VV;
83     real         FF;
84     real         fijC;
85     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
86     real         jx1,jy1,jz1;
87     real         dx11,dy11,dz11,rsq11,rinv11;
88     real         weight_cg1, weight_cg2, weight_product;
89     real         hybscal;
90
91     nri              = *p_nri;         
92     ntype            = *p_ntype;       
93     nthreads         = *p_nthreads;    
94     facel            = *p_facel;       
95     krf              = *p_krf;         
96     crf              = *p_crf;         
97     tabscale         = *p_tabscale;    
98     nouter           = 0;              
99     ninner           = 0;              
100     
101     do
102     {
103         #ifdef GMX_THREAD_SHM_FDECOMP
104         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
105         nn0              = *count;         
106         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
107         *count           = nn1;            
108         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
109         if(nn1>nri) nn1=nri;
110         #else
111         nn0 = 0;
112         nn1 = nri;
113         #endif
114         
115         for(n=nn0; (n<nn1); n++)
116         {
117             is3              = 3*shift[n];     
118             shX              = shiftvec[is3];  
119             shY              = shiftvec[is3+1];
120             shZ              = shiftvec[is3+2];
121             nj0              = jindex[n];      
122             nj1              = jindex[n+1];    
123             ii               = iinr[n];        
124             ii3              = 3*ii;           
125             ix1              = shX + pos[ii3+0];
126             iy1              = shY + pos[ii3+1];
127             iz1              = shZ + pos[ii3+2];
128             iq               = facel*charge[ii];
129             weight_cg1       = wf[ii];         
130             vctot            = 0;              
131             fix1             = 0;              
132             fiy1             = 0;              
133             fiz1             = 0;              
134             
135             for(k=nj0; (k<nj1); k++)
136             {
137                 jnr              = jjnr[k];        
138                 weight_cg2       = wf[jnr];        
139                 weight_product   = weight_cg1*weight_cg2;
140                 if (weight_product < ALMOST_ZERO) {
141                        hybscal = 1.0;
142                 }
143                 else if (weight_product >= ALMOST_ONE)
144                 {
145                   /* force is zero, skip this molecule */
146                        continue;
147                 }
148                 else
149                 {
150                    hybscal = 1.0 - weight_product;
151                 }
152                 j3               = 3*jnr;          
153                 jx1              = pos[j3+0];      
154                 jy1              = pos[j3+1];      
155                 jz1              = pos[j3+2];      
156                 dx11             = ix1 - jx1;      
157                 dy11             = iy1 - jy1;      
158                 dz11             = iz1 - jz1;      
159                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
160                 rinv11           = 1.0/sqrt(rsq11);
161                 qq               = iq*charge[jnr]; 
162                 r                = rsq11*rinv11;   
163                 rt               = r*tabscale;     
164                 n0               = rt;             
165                 eps              = rt-n0;          
166                 eps2             = eps*eps;        
167                 nnn              = 4*n0;           
168                 Y                = VFtab[nnn];     
169                 F                = VFtab[nnn+1];   
170                 Geps             = eps*VFtab[nnn+2];
171                 Heps2            = eps2*VFtab[nnn+3];
172                 Fp               = F+Geps+Heps2;   
173                 VV               = Y+eps*Fp;       
174                 FF               = Fp+Geps+2.0*Heps2;
175                 vcoul            = qq*VV;          
176                 fijC             = qq*FF;          
177                 vctot            = vctot + vcoul;  
178                 fscal            = -((fijC)*tabscale)*rinv11;
179                 fscal *= hybscal;
180                 tx               = fscal*dx11;     
181                 ty               = fscal*dy11;     
182                 tz               = fscal*dz11;     
183                 fix1             = fix1 + tx;      
184                 fiy1             = fiy1 + ty;      
185                 fiz1             = fiz1 + tz;      
186                 faction[j3+0]    = faction[j3+0] - tx;
187                 faction[j3+1]    = faction[j3+1] - ty;
188                 faction[j3+2]    = faction[j3+2] - tz;
189             }
190             
191             faction[ii3+0]   = faction[ii3+0] + fix1;
192             faction[ii3+1]   = faction[ii3+1] + fiy1;
193             faction[ii3+2]   = faction[ii3+2] + fiz1;
194             fshift[is3]      = fshift[is3]+fix1;
195             fshift[is3+1]    = fshift[is3+1]+fiy1;
196             fshift[is3+2]    = fshift[is3+2]+fiz1;
197             ggid             = gid[n];         
198             Vc[ggid]         = Vc[ggid] + vctot;
199             ninner           = ninner + nj1 - nj0;
200         }
201         
202         nouter           = nouter + nn1 - nn0;
203     }
204     while (nn1<nri);
205     
206     *outeriter       = nouter;         
207     *inneriter       = ninner;         
208 }
209
210
211
212
213
214 /*
215  * Gromacs nonbonded kernel nb_kernel300_adress_ex
216  * Coulomb interaction:     Tabulated
217  * VdW interaction:         Not calculated
218  * water optimization:      No
219  * Calculate forces:        yes
220  */
221 void nb_kernel300_adress_ex(
222                     int *           p_nri,
223                     int *           iinr,
224                     int *           jindex,
225                     int *           jjnr,
226                     int *           shift,
227                     real *         shiftvec,
228                     real *         fshift,
229                     int *           gid,
230                     real *         pos,
231                     real *         faction,
232                     real *         charge,
233                     real *         p_facel,
234                     real *         p_krf,
235                     real *         p_crf,
236                     real *         Vc,
237                     int *           type,
238                     int *           p_ntype,
239                     real *         vdwparam,
240                     real *         Vvdw,
241                     real *         p_tabscale,
242                     real *         VFtab,
243                     real *         invsqrta,
244                     real *         dvda,
245                     real *         p_gbtabscale,
246                     real *         GBtab,
247                     int *           p_nthreads,
248                     int *           count,
249                     void *          mtx,
250                     int *           outeriter,
251                     int *           inneriter,
252                     real           force_cap,
253                     real *         wf)
254 {
255     int           nri,ntype,nthreads;
256     real         facel,krf,crf,tabscale,gbtabscale;
257     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
258     int           nn0,nn1,nouter,ninner;
259     real         shX,shY,shZ;
260     real         fscal,tx,ty,tz;
261     real         iq;
262     real         qq,vcoul,vctot;
263     real         r,rt,eps,eps2;
264     int           n0,nnn;
265     real         Y,F,Geps,Heps2,Fp,VV;
266     real         FF;
267     real         fijC;
268     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
269     real         jx1,jy1,jz1;
270     real         dx11,dy11,dz11,rsq11,rinv11;
271     real         weight_cg1, weight_cg2, weight_product;
272     real         hybscal;
273
274     nri              = *p_nri;         
275     ntype            = *p_ntype;       
276     nthreads         = *p_nthreads;    
277     facel            = *p_facel;       
278     krf              = *p_krf;         
279     crf              = *p_crf;         
280     tabscale         = *p_tabscale;    
281     nouter           = 0;              
282     ninner           = 0;              
283     
284     do
285     {
286         #ifdef GMX_THREAD_SHM_FDECOMP
287         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
288         nn0              = *count;         
289         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
290         *count           = nn1;            
291         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
292         if(nn1>nri) nn1=nri;
293         #else
294         nn0 = 0;
295         nn1 = nri;
296         #endif
297         
298         for(n=nn0; (n<nn1); n++)
299         {
300             is3              = 3*shift[n];     
301             shX              = shiftvec[is3];  
302             shY              = shiftvec[is3+1];
303             shZ              = shiftvec[is3+2];
304             nj0              = jindex[n];      
305             nj1              = jindex[n+1];    
306             ii               = iinr[n];        
307             ii3              = 3*ii;           
308             ix1              = shX + pos[ii3+0];
309             iy1              = shY + pos[ii3+1];
310             iz1              = shZ + pos[ii3+2];
311             iq               = facel*charge[ii];
312             weight_cg1       = wf[ii];         
313             vctot            = 0;              
314             fix1             = 0;              
315             fiy1             = 0;              
316             fiz1             = 0;              
317             
318             for(k=nj0; (k<nj1); k++)
319             {
320                 jnr              = jjnr[k];        
321                 weight_cg2       = wf[jnr];        
322                 weight_product   = weight_cg1*weight_cg2;
323                 if (weight_product < ALMOST_ZERO) {
324                 /* force is zero, skip this molecule */
325                  continue;
326                 }
327                 else if (weight_product >= ALMOST_ONE)
328                 {
329                        hybscal = 1.0;
330                 }
331                 else
332                 {
333                    hybscal = weight_product;
334                 }
335                 j3               = 3*jnr;          
336                 jx1              = pos[j3+0];      
337                 jy1              = pos[j3+1];      
338                 jz1              = pos[j3+2];      
339                 dx11             = ix1 - jx1;      
340                 dy11             = iy1 - jy1;      
341                 dz11             = iz1 - jz1;      
342                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
343                 rinv11           = 1.0/sqrt(rsq11);
344                 qq               = iq*charge[jnr]; 
345                 r                = rsq11*rinv11;   
346                 rt               = r*tabscale;     
347                 n0               = rt;             
348                 eps              = rt-n0;          
349                 eps2             = eps*eps;        
350                 nnn              = 4*n0;           
351                 Y                = VFtab[nnn];     
352                 F                = VFtab[nnn+1];   
353                 Geps             = eps*VFtab[nnn+2];
354                 Heps2            = eps2*VFtab[nnn+3];
355                 Fp               = F+Geps+Heps2;   
356                 VV               = Y+eps*Fp;       
357                 FF               = Fp+Geps+2.0*Heps2;
358                 vcoul            = qq*VV;          
359                 fijC             = qq*FF;          
360                 vctot            = vctot + vcoul;  
361                 fscal            = -((fijC)*tabscale)*rinv11;
362                 fscal *= hybscal;
363                 tx               = fscal*dx11;     
364                 ty               = fscal*dy11;     
365                 tz               = fscal*dz11;     
366                 fix1             = fix1 + tx;      
367                 fiy1             = fiy1 + ty;      
368                 fiz1             = fiz1 + tz;      
369                 faction[j3+0]    = faction[j3+0] - tx;
370                 faction[j3+1]    = faction[j3+1] - ty;
371                 faction[j3+2]    = faction[j3+2] - tz;
372             }
373             
374             faction[ii3+0]   = faction[ii3+0] + fix1;
375             faction[ii3+1]   = faction[ii3+1] + fiy1;
376             faction[ii3+2]   = faction[ii3+2] + fiz1;
377             fshift[is3]      = fshift[is3]+fix1;
378             fshift[is3+1]    = fshift[is3+1]+fiy1;
379             fshift[is3+2]    = fshift[is3+2]+fiz1;
380             ggid             = gid[n];         
381             Vc[ggid]         = Vc[ggid] + vctot;
382             ninner           = ninner + nj1 - nj0;
383         }
384         
385         nouter           = nouter + nn1 - nn0;
386     }
387     while (nn1<nri);
388     
389     *outeriter       = nouter;         
390     *inneriter       = ninner;         
391 }
392
393