Merge remote-tracking branch 'origin/release-4-6' into HEAD
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_adress_c / nb_kernel220_c_adress.c
1 /*
2  * Copyright (c) Erik Lindahl, David van der Spoel 2003
3  * 
4  * This file is generated automatically at compile time
5  * by the program mknb in the Gromacs distribution.
6  *
7  * Options used when generation this file:
8  * Language:         c
9  * Precision:        single
10  * Threads:          yes
11  * Software invsqrt: no
12  * PowerPC invsqrt:  no
13  * Prefetch forces:  no
14  * Adress kernel:  yes
15  * Comments:         no
16  */
17 #ifdef HAVE_CONFIG_H
18 #include<config.h>
19 #endif
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
22 #endif
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
25 #include<math.h>
26
27 #include "nb_kernel220_adress.h"
28
29
30
31 /*
32  * Gromacs nonbonded kernel nb_kernel220_adress_cg
33  * Coulomb interaction:     Reaction field
34  * VdW interaction:         Buckingham
35  * water optimization:      No
36  * Calculate forces:        yes
37  */
38 void nb_kernel220_adress_cg(
39                     int *           p_nri,
40                     int *           iinr,
41                     int *           jindex,
42                     int *           jjnr,
43                     int *           shift,
44                     real *         shiftvec,
45                     real *         fshift,
46                     int *           gid,
47                     real *         pos,
48                     real *         faction,
49                     real *         charge,
50                     real *         p_facel,
51                     real *         p_krf,
52                     real *         p_crf,
53                     real *         Vc,
54                     int *           type,
55                     int *           p_ntype,
56                     real *         vdwparam,
57                     real *         Vvdw,
58                     real *         p_tabscale,
59                     real *         VFtab,
60                     real *         invsqrta,
61                     real *         dvda,
62                     real *         p_gbtabscale,
63                     real *         GBtab,
64                     int *           p_nthreads,
65                     int *           count,
66                     void *          mtx,
67                     int *           outeriter,
68                     int *           inneriter,
69                     real           force_cap,
70                     real *         wf)
71 {
72     int           nri,ntype,nthreads;
73     real         facel,krf,crf,tabscale,gbtabscale;
74     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75     int           nn0,nn1,nouter,ninner;
76     real         shX,shY,shZ;
77     real         fscal,tx,ty,tz;
78     real         rinvsq;
79     real         iq;
80     real         qq,vcoul,vctot;
81     int           nti;
82     int           tj;
83     real         rinvsix;
84     real         Vvdw6,Vvdwtot;
85     real         krsq;
86     real         Vvdwexp,br;
87     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
88     real         jx1,jy1,jz1;
89     real         dx11,dy11,dz11,rsq11,rinv11;
90     real         c6,cexp1,cexp2;
91     real         weight_cg1, weight_cg2, weight_product;
92     real         hybscal;
93
94     nri              = *p_nri;         
95     ntype            = *p_ntype;       
96     nthreads         = *p_nthreads;    
97     facel            = *p_facel;       
98     krf              = *p_krf;         
99     crf              = *p_crf;         
100     tabscale         = *p_tabscale;    
101     nouter           = 0;              
102     ninner           = 0;              
103     
104     do
105     {
106         #ifdef GMX_THREAD_SHM_FDECOMP
107         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
108         nn0              = *count;         
109         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
110         *count           = nn1;            
111         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
112         if(nn1>nri) nn1=nri;
113         #else
114         nn0 = 0;
115         nn1 = nri;
116         #endif
117         
118         for(n=nn0; (n<nn1); n++)
119         {
120             is3              = 3*shift[n];     
121             shX              = shiftvec[is3];  
122             shY              = shiftvec[is3+1];
123             shZ              = shiftvec[is3+2];
124             nj0              = jindex[n];      
125             nj1              = jindex[n+1];    
126             ii               = iinr[n];        
127             ii3              = 3*ii;           
128             ix1              = shX + pos[ii3+0];
129             iy1              = shY + pos[ii3+1];
130             iz1              = shZ + pos[ii3+2];
131             iq               = facel*charge[ii];
132             nti              = 3*ntype*type[ii];
133             weight_cg1       = wf[ii];         
134             vctot            = 0;              
135             Vvdwtot          = 0;              
136             fix1             = 0;              
137             fiy1             = 0;              
138             fiz1             = 0;              
139             
140             for(k=nj0; (k<nj1); k++)
141             {
142                 jnr              = jjnr[k];        
143                 weight_cg2       = wf[jnr];        
144                 weight_product   = weight_cg1*weight_cg2;
145                 if (weight_product < ALMOST_ZERO) {
146                        hybscal = 1.0;
147                 }
148                 else if (weight_product >= ALMOST_ONE)
149                 {
150                   /* force is zero, skip this molecule */
151                        continue;
152                 }
153                 else
154                 {
155                    hybscal = 1.0 - weight_product;
156                 }
157                 j3               = 3*jnr;          
158                 jx1              = pos[j3+0];      
159                 jy1              = pos[j3+1];      
160                 jz1              = pos[j3+2];      
161                 dx11             = ix1 - jx1;      
162                 dy11             = iy1 - jy1;      
163                 dz11             = iz1 - jz1;      
164                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
165                 rinv11           = 1.0/sqrt(rsq11);
166                 qq               = iq*charge[jnr]; 
167                 tj               = nti+3*type[jnr];
168                 c6               = vdwparam[tj];   
169                 cexp1            = vdwparam[tj+1]; 
170                 cexp2            = vdwparam[tj+2]; 
171                 rinvsq           = rinv11*rinv11;  
172                 krsq             = krf*rsq11;      
173                 vcoul            = qq*(rinv11+krsq-crf);
174                 vctot            = vctot+vcoul;    
175                 rinvsix          = rinvsq*rinvsq*rinvsq;
176                 Vvdw6            = c6*rinvsix;     
177                 br               = cexp2*rsq11*rinv11;
178                 Vvdwexp          = cexp1*exp(-br); 
179                 Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
180                 fscal            = (qq*(rinv11-2.0*krsq)+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
181                 fscal *= hybscal;
182                 tx               = fscal*dx11;     
183                 ty               = fscal*dy11;     
184                 tz               = fscal*dz11;     
185                 fix1             = fix1 + tx;      
186                 fiy1             = fiy1 + ty;      
187                 fiz1             = fiz1 + tz;      
188                 faction[j3+0]    = faction[j3+0] - tx;
189                 faction[j3+1]    = faction[j3+1] - ty;
190                 faction[j3+2]    = faction[j3+2] - tz;
191             }
192             
193             faction[ii3+0]   = faction[ii3+0] + fix1;
194             faction[ii3+1]   = faction[ii3+1] + fiy1;
195             faction[ii3+2]   = faction[ii3+2] + fiz1;
196             fshift[is3]      = fshift[is3]+fix1;
197             fshift[is3+1]    = fshift[is3+1]+fiy1;
198             fshift[is3+2]    = fshift[is3+2]+fiz1;
199             ggid             = gid[n];         
200             Vc[ggid]         = Vc[ggid] + vctot;
201             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
202             ninner           = ninner + nj1 - nj0;
203         }
204         
205         nouter           = nouter + nn1 - nn0;
206     }
207     while (nn1<nri);
208     
209     *outeriter       = nouter;         
210     *inneriter       = ninner;         
211 }
212
213
214
215
216
217 /*
218  * Gromacs nonbonded kernel nb_kernel220_adress_ex
219  * Coulomb interaction:     Reaction field
220  * VdW interaction:         Buckingham
221  * water optimization:      No
222  * Calculate forces:        yes
223  */
224 void nb_kernel220_adress_ex(
225                     int *           p_nri,
226                     int *           iinr,
227                     int *           jindex,
228                     int *           jjnr,
229                     int *           shift,
230                     real *         shiftvec,
231                     real *         fshift,
232                     int *           gid,
233                     real *         pos,
234                     real *         faction,
235                     real *         charge,
236                     real *         p_facel,
237                     real *         p_krf,
238                     real *         p_crf,
239                     real *         Vc,
240                     int *           type,
241                     int *           p_ntype,
242                     real *         vdwparam,
243                     real *         Vvdw,
244                     real *         p_tabscale,
245                     real *         VFtab,
246                     real *         invsqrta,
247                     real *         dvda,
248                     real *         p_gbtabscale,
249                     real *         GBtab,
250                     int *           p_nthreads,
251                     int *           count,
252                     void *          mtx,
253                     int *           outeriter,
254                     int *           inneriter,
255                     real           force_cap,
256                     real *         wf)
257 {
258     int           nri,ntype,nthreads;
259     real         facel,krf,crf,tabscale,gbtabscale;
260     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
261     int           nn0,nn1,nouter,ninner;
262     real         shX,shY,shZ;
263     real         fscal,tx,ty,tz;
264     real         rinvsq;
265     real         iq;
266     real         qq,vcoul,vctot;
267     int           nti;
268     int           tj;
269     real         rinvsix;
270     real         Vvdw6,Vvdwtot;
271     real         krsq;
272     real         Vvdwexp,br;
273     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
274     real         jx1,jy1,jz1;
275     real         dx11,dy11,dz11,rsq11,rinv11;
276     real         c6,cexp1,cexp2;
277     real         weight_cg1, weight_cg2, weight_product;
278     real         hybscal;
279
280     nri              = *p_nri;         
281     ntype            = *p_ntype;       
282     nthreads         = *p_nthreads;    
283     facel            = *p_facel;       
284     krf              = *p_krf;         
285     crf              = *p_crf;         
286     tabscale         = *p_tabscale;    
287     nouter           = 0;              
288     ninner           = 0;              
289     
290     do
291     {
292         #ifdef GMX_THREAD_SHM_FDECOMP
293         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
294         nn0              = *count;         
295         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
296         *count           = nn1;            
297         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
298         if(nn1>nri) nn1=nri;
299         #else
300         nn0 = 0;
301         nn1 = nri;
302         #endif
303         
304         for(n=nn0; (n<nn1); n++)
305         {
306             is3              = 3*shift[n];     
307             shX              = shiftvec[is3];  
308             shY              = shiftvec[is3+1];
309             shZ              = shiftvec[is3+2];
310             nj0              = jindex[n];      
311             nj1              = jindex[n+1];    
312             ii               = iinr[n];        
313             ii3              = 3*ii;           
314             ix1              = shX + pos[ii3+0];
315             iy1              = shY + pos[ii3+1];
316             iz1              = shZ + pos[ii3+2];
317             iq               = facel*charge[ii];
318             nti              = 3*ntype*type[ii];
319             weight_cg1       = wf[ii];         
320             vctot            = 0;              
321             Vvdwtot          = 0;              
322             fix1             = 0;              
323             fiy1             = 0;              
324             fiz1             = 0;              
325             
326             for(k=nj0; (k<nj1); k++)
327             {
328                 jnr              = jjnr[k];        
329                 weight_cg2       = wf[jnr];        
330                 weight_product   = weight_cg1*weight_cg2;
331                 if (weight_product < ALMOST_ZERO) {
332                 /* force is zero, skip this molecule */
333                  continue;
334                 }
335                 else if (weight_product >= ALMOST_ONE)
336                 {
337                        hybscal = 1.0;
338                 }
339                 else
340                 {
341                    hybscal = weight_product;
342                 }
343                 j3               = 3*jnr;          
344                 jx1              = pos[j3+0];      
345                 jy1              = pos[j3+1];      
346                 jz1              = pos[j3+2];      
347                 dx11             = ix1 - jx1;      
348                 dy11             = iy1 - jy1;      
349                 dz11             = iz1 - jz1;      
350                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
351                 rinv11           = 1.0/sqrt(rsq11);
352                 qq               = iq*charge[jnr]; 
353                 tj               = nti+3*type[jnr];
354                 c6               = vdwparam[tj];   
355                 cexp1            = vdwparam[tj+1]; 
356                 cexp2            = vdwparam[tj+2]; 
357                 rinvsq           = rinv11*rinv11;  
358                 krsq             = krf*rsq11;      
359                 vcoul            = qq*(rinv11+krsq-crf);
360                 vctot            = vctot+vcoul;    
361                 rinvsix          = rinvsq*rinvsq*rinvsq;
362                 Vvdw6            = c6*rinvsix;     
363                 br               = cexp2*rsq11*rinv11;
364                 Vvdwexp          = cexp1*exp(-br); 
365                 Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
366                 fscal            = (qq*(rinv11-2.0*krsq)+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
367                 fscal *= hybscal;
368                 if(force_cap>0 && (fabs(fscal)> force_cap)){
369                 fscal=force_cap*fscal/fabs(fscal);
370                 }
371                 tx               = fscal*dx11;     
372                 ty               = fscal*dy11;     
373                 tz               = fscal*dz11;     
374                 fix1             = fix1 + tx;      
375                 fiy1             = fiy1 + ty;      
376                 fiz1             = fiz1 + tz;      
377                 faction[j3+0]    = faction[j3+0] - tx;
378                 faction[j3+1]    = faction[j3+1] - ty;
379                 faction[j3+2]    = faction[j3+2] - tz;
380             }
381             
382             faction[ii3+0]   = faction[ii3+0] + fix1;
383             faction[ii3+1]   = faction[ii3+1] + fiy1;
384             faction[ii3+2]   = faction[ii3+2] + fiz1;
385             fshift[is3]      = fshift[is3]+fix1;
386             fshift[is3+1]    = fshift[is3+1]+fiy1;
387             fshift[is3+2]    = fshift[is3+2]+fiz1;
388             ggid             = gid[n];         
389             Vc[ggid]         = Vc[ggid] + vctot;
390             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
391             ninner           = ninner + nj1 - nj0;
392         }
393         
394         nouter           = nouter + nn1 - nn0;
395     }
396     while (nn1<nri);
397     
398     *outeriter       = nouter;         
399     *inneriter       = ninner;         
400 }
401
402