Merge remote-tracking branch 'origin/release-4-6' into HEAD
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_adress_c / nb_kernel230_c_adress.c
1 /*
2  * Copyright (c) Erik Lindahl, David van der Spoel 2003
3  * 
4  * This file is generated automatically at compile time
5  * by the program mknb in the Gromacs distribution.
6  *
7  * Options used when generation this file:
8  * Language:         c
9  * Precision:        single
10  * Threads:          yes
11  * Software invsqrt: no
12  * PowerPC invsqrt:  no
13  * Prefetch forces:  no
14  * Adress kernel:  yes
15  * Comments:         no
16  */
17 #ifdef HAVE_CONFIG_H
18 #include<config.h>
19 #endif
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
22 #endif
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
25 #include<math.h>
26
27 #include "nb_kernel230_adress.h"
28
29
30
31 /*
32  * Gromacs nonbonded kernel nb_kernel230_adress_cg
33  * Coulomb interaction:     Reaction field
34  * VdW interaction:         Tabulated
35  * water optimization:      No
36  * Calculate forces:        yes
37  */
38 void nb_kernel230_adress_cg(
39                     int *           p_nri,
40                     int *           iinr,
41                     int *           jindex,
42                     int *           jjnr,
43                     int *           shift,
44                     real *         shiftvec,
45                     real *         fshift,
46                     int *           gid,
47                     real *         pos,
48                     real *         faction,
49                     real *         charge,
50                     real *         p_facel,
51                     real *         p_krf,
52                     real *         p_crf,
53                     real *         Vc,
54                     int *           type,
55                     int *           p_ntype,
56                     real *         vdwparam,
57                     real *         Vvdw,
58                     real *         p_tabscale,
59                     real *         VFtab,
60                     real *         invsqrta,
61                     real *         dvda,
62                     real *         p_gbtabscale,
63                     real *         GBtab,
64                     int *           p_nthreads,
65                     int *           count,
66                     void *          mtx,
67                     int *           outeriter,
68                     int *           inneriter,
69                     real           force_cap,
70                     real *         wf)
71 {
72     int           nri,ntype,nthreads;
73     real         facel,krf,crf,tabscale,gbtabscale;
74     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75     int           nn0,nn1,nouter,ninner;
76     real         shX,shY,shZ;
77     real         fscal,tx,ty,tz;
78     real         rinvsq;
79     real         iq;
80     real         qq,vcoul,vctot;
81     int           nti;
82     int           tj;
83     real         Vvdw6,Vvdwtot;
84     real         Vvdw12;
85     real         r,rt,eps,eps2;
86     int           n0,nnn;
87     real         Y,F,Geps,Heps2,Fp,VV;
88     real         FF;
89     real         fijD,fijR;
90     real         krsq;
91     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
92     real         jx1,jy1,jz1;
93     real         dx11,dy11,dz11,rsq11,rinv11;
94     real         c6,c12;
95     real         weight_cg1, weight_cg2, weight_product;
96     real         hybscal;
97
98     nri              = *p_nri;         
99     ntype            = *p_ntype;       
100     nthreads         = *p_nthreads;    
101     facel            = *p_facel;       
102     krf              = *p_krf;         
103     crf              = *p_crf;         
104     tabscale         = *p_tabscale;    
105     nouter           = 0;              
106     ninner           = 0;              
107     
108     do
109     {
110         #ifdef GMX_THREAD_SHM_FDECOMP
111         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
112         nn0              = *count;         
113         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
114         *count           = nn1;            
115         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
116         if(nn1>nri) nn1=nri;
117         #else
118         nn0 = 0;
119         nn1 = nri;
120         #endif
121         
122         for(n=nn0; (n<nn1); n++)
123         {
124             is3              = 3*shift[n];     
125             shX              = shiftvec[is3];  
126             shY              = shiftvec[is3+1];
127             shZ              = shiftvec[is3+2];
128             nj0              = jindex[n];      
129             nj1              = jindex[n+1];    
130             ii               = iinr[n];        
131             ii3              = 3*ii;           
132             ix1              = shX + pos[ii3+0];
133             iy1              = shY + pos[ii3+1];
134             iz1              = shZ + pos[ii3+2];
135             iq               = facel*charge[ii];
136             nti              = 2*ntype*type[ii];
137             weight_cg1       = wf[ii];         
138             vctot            = 0;              
139             Vvdwtot          = 0;              
140             fix1             = 0;              
141             fiy1             = 0;              
142             fiz1             = 0;              
143             
144             for(k=nj0; (k<nj1); k++)
145             {
146                 jnr              = jjnr[k];        
147                 weight_cg2       = wf[jnr];        
148                 weight_product   = weight_cg1*weight_cg2;
149                 if (weight_product < ALMOST_ZERO) {
150                        hybscal = 1.0;
151                 }
152                 else if (weight_product >= ALMOST_ONE)
153                 {
154                   /* force is zero, skip this molecule */
155                        continue;
156                 }
157                 else
158                 {
159                    hybscal = 1.0 - weight_product;
160                 }
161                 j3               = 3*jnr;          
162                 jx1              = pos[j3+0];      
163                 jy1              = pos[j3+1];      
164                 jz1              = pos[j3+2];      
165                 dx11             = ix1 - jx1;      
166                 dy11             = iy1 - jy1;      
167                 dz11             = iz1 - jz1;      
168                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
169                 rinv11           = 1.0/sqrt(rsq11);
170                 qq               = iq*charge[jnr]; 
171                 tj               = nti+2*type[jnr];
172                 c6               = vdwparam[tj];   
173                 c12              = vdwparam[tj+1]; 
174                 rinvsq           = rinv11*rinv11;  
175                 krsq             = krf*rsq11;      
176                 vcoul            = qq*(rinv11+krsq-crf);
177                 vctot            = vctot+vcoul;    
178                 r                = rsq11*rinv11;   
179                 rt               = r*tabscale;     
180                 n0               = rt;             
181                 eps              = rt-n0;          
182                 eps2             = eps*eps;        
183                 nnn              = 8*n0;           
184                 Y                = VFtab[nnn];     
185                 F                = VFtab[nnn+1];   
186                 Geps             = eps*VFtab[nnn+2];
187                 Heps2            = eps2*VFtab[nnn+3];
188                 Fp               = F+Geps+Heps2;   
189                 VV               = Y+eps*Fp;       
190                 FF               = Fp+Geps+2.0*Heps2;
191                 Vvdw6            = c6*VV;          
192                 fijD             = c6*FF;          
193                 nnn              = nnn+4;          
194                 Y                = VFtab[nnn];     
195                 F                = VFtab[nnn+1];   
196                 Geps             = eps*VFtab[nnn+2];
197                 Heps2            = eps2*VFtab[nnn+3];
198                 Fp               = F+Geps+Heps2;   
199                 VV               = Y+eps*Fp;       
200                 FF               = Fp+Geps+2.0*Heps2;
201                 Vvdw12           = c12*VV;         
202                 fijR             = c12*FF;         
203                 Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
204                 fscal            = (qq*(rinv11-2.0*krsq))*rinvsq-((fijD+fijR)*tabscale)*rinv11;
205                 fscal *= hybscal;
206                 tx               = fscal*dx11;     
207                 ty               = fscal*dy11;     
208                 tz               = fscal*dz11;     
209                 fix1             = fix1 + tx;      
210                 fiy1             = fiy1 + ty;      
211                 fiz1             = fiz1 + tz;      
212                 faction[j3+0]    = faction[j3+0] - tx;
213                 faction[j3+1]    = faction[j3+1] - ty;
214                 faction[j3+2]    = faction[j3+2] - tz;
215             }
216             
217             faction[ii3+0]   = faction[ii3+0] + fix1;
218             faction[ii3+1]   = faction[ii3+1] + fiy1;
219             faction[ii3+2]   = faction[ii3+2] + fiz1;
220             fshift[is3]      = fshift[is3]+fix1;
221             fshift[is3+1]    = fshift[is3+1]+fiy1;
222             fshift[is3+2]    = fshift[is3+2]+fiz1;
223             ggid             = gid[n];         
224             Vc[ggid]         = Vc[ggid] + vctot;
225             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
226             ninner           = ninner + nj1 - nj0;
227         }
228         
229         nouter           = nouter + nn1 - nn0;
230     }
231     while (nn1<nri);
232     
233     *outeriter       = nouter;         
234     *inneriter       = ninner;         
235 }
236
237
238
239
240
241 /*
242  * Gromacs nonbonded kernel nb_kernel230_adress_ex
243  * Coulomb interaction:     Reaction field
244  * VdW interaction:         Tabulated
245  * water optimization:      No
246  * Calculate forces:        yes
247  */
248 void nb_kernel230_adress_ex(
249                     int *           p_nri,
250                     int *           iinr,
251                     int *           jindex,
252                     int *           jjnr,
253                     int *           shift,
254                     real *         shiftvec,
255                     real *         fshift,
256                     int *           gid,
257                     real *         pos,
258                     real *         faction,
259                     real *         charge,
260                     real *         p_facel,
261                     real *         p_krf,
262                     real *         p_crf,
263                     real *         Vc,
264                     int *           type,
265                     int *           p_ntype,
266                     real *         vdwparam,
267                     real *         Vvdw,
268                     real *         p_tabscale,
269                     real *         VFtab,
270                     real *         invsqrta,
271                     real *         dvda,
272                     real *         p_gbtabscale,
273                     real *         GBtab,
274                     int *           p_nthreads,
275                     int *           count,
276                     void *          mtx,
277                     int *           outeriter,
278                     int *           inneriter,
279                     real           force_cap,
280                     real *         wf)
281 {
282     int           nri,ntype,nthreads;
283     real         facel,krf,crf,tabscale,gbtabscale;
284     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
285     int           nn0,nn1,nouter,ninner;
286     real         shX,shY,shZ;
287     real         fscal,tx,ty,tz;
288     real         rinvsq;
289     real         iq;
290     real         qq,vcoul,vctot;
291     int           nti;
292     int           tj;
293     real         Vvdw6,Vvdwtot;
294     real         Vvdw12;
295     real         r,rt,eps,eps2;
296     int           n0,nnn;
297     real         Y,F,Geps,Heps2,Fp,VV;
298     real         FF;
299     real         fijD,fijR;
300     real         krsq;
301     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
302     real         jx1,jy1,jz1;
303     real         dx11,dy11,dz11,rsq11,rinv11;
304     real         c6,c12;
305     real         weight_cg1, weight_cg2, weight_product;
306     real         hybscal;
307
308     nri              = *p_nri;         
309     ntype            = *p_ntype;       
310     nthreads         = *p_nthreads;    
311     facel            = *p_facel;       
312     krf              = *p_krf;         
313     crf              = *p_crf;         
314     tabscale         = *p_tabscale;    
315     nouter           = 0;              
316     ninner           = 0;
317
318     do
319     {
320         #ifdef GMX_THREAD_SHM_FDECOMP
321         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
322         nn0              = *count;         
323         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
324         *count           = nn1;            
325         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
326         if(nn1>nri) nn1=nri;
327         #else
328         nn0 = 0;
329         nn1 = nri;
330         #endif
331         
332         for(n=nn0; (n<nn1); n++)
333         {
334             is3              = 3*shift[n];     
335             shX              = shiftvec[is3];  
336             shY              = shiftvec[is3+1];
337             shZ              = shiftvec[is3+2];
338             nj0              = jindex[n];      
339             nj1              = jindex[n+1];    
340             ii               = iinr[n];        
341             ii3              = 3*ii;           
342             ix1              = shX + pos[ii3+0];
343             iy1              = shY + pos[ii3+1];
344             iz1              = shZ + pos[ii3+2];
345             iq               = facel*charge[ii];
346             nti              = 2*ntype*type[ii];
347             weight_cg1       = wf[ii];         
348             vctot            = 0;              
349             Vvdwtot          = 0;              
350             fix1             = 0;              
351             fiy1             = 0;              
352             fiz1             = 0;              
353             
354             for(k=nj0; (k<nj1); k++)
355             {
356                 jnr              = jjnr[k];        
357                 weight_cg2       = wf[jnr];        
358                 weight_product   = weight_cg1*weight_cg2;
359                 if (weight_product < ALMOST_ZERO) {
360                 /* force is zero, skip this molecule */
361                  continue;
362                 }
363                 else if (weight_product >= ALMOST_ONE)
364                 {
365                        hybscal = 1.0;
366                 }
367                 else
368                 {
369                    hybscal = weight_product;
370                 }
371                 j3               = 3*jnr;          
372                 jx1              = pos[j3+0];      
373                 jy1              = pos[j3+1];      
374                 jz1              = pos[j3+2];      
375                 dx11             = ix1 - jx1;      
376                 dy11             = iy1 - jy1;      
377                 dz11             = iz1 - jz1;      
378                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
379                 rinv11           = 1.0/sqrt(rsq11);
380                 qq               = iq*charge[jnr]; 
381                 tj               = nti+2*type[jnr];
382                 c6               = vdwparam[tj];   
383                 c12              = vdwparam[tj+1]; 
384                 rinvsq           = rinv11*rinv11;  
385                 krsq             = krf*rsq11;      
386                 vcoul            = qq*(rinv11+krsq-crf);
387                 vctot            = vctot+vcoul;    
388                 r                = rsq11*rinv11;   
389                 rt               = r*tabscale;     
390                 n0               = rt;             
391                 eps              = rt-n0;          
392                 eps2             = eps*eps;        
393                 nnn              = 8*n0;           
394                 Y                = VFtab[nnn];     
395                 F                = VFtab[nnn+1];   
396                 Geps             = eps*VFtab[nnn+2];
397                 Heps2            = eps2*VFtab[nnn+3];
398                 Fp               = F+Geps+Heps2;   
399                 VV               = Y+eps*Fp;       
400                 FF               = Fp+Geps+2.0*Heps2;
401                 Vvdw6            = c6*VV;          
402                 fijD             = c6*FF;          
403                 nnn              = nnn+4;          
404                 Y                = VFtab[nnn];     
405                 F                = VFtab[nnn+1];   
406                 Geps             = eps*VFtab[nnn+2];
407                 Heps2            = eps2*VFtab[nnn+3];
408                 Fp               = F+Geps+Heps2;   
409                 VV               = Y+eps*Fp;       
410                 FF               = Fp+Geps+2.0*Heps2;
411                 Vvdw12           = c12*VV;         
412                 fijR             = c12*FF;         
413                 Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
414                 fscal            = (qq*(rinv11-2.0*krsq))*rinvsq-((fijD+fijR)*tabscale)*rinv11;
415                 fscal *= hybscal;
416                 if(force_cap>0 && (fabs(fscal)> force_cap)){
417                 fscal=force_cap*fscal/fabs(fscal);
418                 }
419                 tx               = fscal*dx11;     
420                 ty               = fscal*dy11;     
421                 tz               = fscal*dz11;     
422                 fix1             = fix1 + tx;      
423                 fiy1             = fiy1 + ty;      
424                 fiz1             = fiz1 + tz;      
425                 faction[j3+0]    = faction[j3+0] - tx;
426                 faction[j3+1]    = faction[j3+1] - ty;
427                 faction[j3+2]    = faction[j3+2] - tz;
428             }
429             
430             faction[ii3+0]   = faction[ii3+0] + fix1;
431             faction[ii3+1]   = faction[ii3+1] + fiy1;
432             faction[ii3+2]   = faction[ii3+2] + fiz1;
433             fshift[is3]      = fshift[is3]+fix1;
434             fshift[is3+1]    = fshift[is3+1]+fiy1;
435             fshift[is3+2]    = fshift[is3+2]+fiz1;
436             ggid             = gid[n];         
437             Vc[ggid]         = Vc[ggid] + vctot;
438             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
439             ninner           = ninner + nj1 - nj0;
440         }
441         
442         nouter           = nouter + nn1 - nn0;
443     }
444     while (nn1<nri);
445     
446     *outeriter       = nouter;         
447     *inneriter       = ninner;         
448 }
449
450