Merge remote-tracking branch 'origin/release-4-6' into HEAD
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_adress_c / nb_kernel420_c_adress.c
1 /*
2  * Copyright (c) Erik Lindahl, David van der Spoel 2003
3  * 
4  * This file is generated automatically at compile time
5  * by the program mknb in the Gromacs distribution.
6  *
7  * Options used when generation this file:
8  * Language:         c
9  * Precision:        single
10  * Threads:          yes
11  * Software invsqrt: no
12  * PowerPC invsqrt:  no
13  * Prefetch forces:  no
14  * Adress kernel:  yes
15  * Comments:         no
16  */
17 #ifdef HAVE_CONFIG_H
18 #include<config.h>
19 #endif
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
22 #endif
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
25 #include<math.h>
26
27 #include "nb_kernel420_adress.h"
28
29
30
31 /*
32  * Gromacs nonbonded kernel nb_kernel420_adress_cg
33  * Coulomb interaction:     Generalized-Born
34  * VdW interaction:         Buckingham
35  * water optimization:      No
36  * Calculate forces:        yes
37  */
38 void nb_kernel420_adress_cg(
39                     int *           p_nri,
40                     int *           iinr,
41                     int *           jindex,
42                     int *           jjnr,
43                     int *           shift,
44                     real *         shiftvec,
45                     real *         fshift,
46                     int *           gid,
47                     real *         pos,
48                     real *         faction,
49                     real *         charge,
50                     real *         p_facel,
51                     real *         p_krf,
52                     real *         p_crf,
53                     real *         Vc,
54                     int *           type,
55                     int *           p_ntype,
56                     real *         vdwparam,
57                     real *         Vvdw,
58                     real *         p_tabscale,
59                     real *         VFtab,
60                     real *         invsqrta,
61                     real *         dvda,
62                     real *         p_gbtabscale,
63                     real *         GBtab,
64                     int *           p_nthreads,
65                     int *           count,
66                     void *          mtx,
67                     int *           outeriter,
68                     int *           inneriter,
69                     real           force_cap,
70                     real *         wf)
71 {
72     int           nri,ntype,nthreads;
73     real         facel,krf,crf,tabscale,gbtabscale;
74     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75     int           nn0,nn1,nouter,ninner;
76     real         shX,shY,shZ;
77     real         fscal,tx,ty,tz;
78     real         rinvsq;
79     real         iq;
80     real         qq,vcoul,vctot;
81     int           nti;
82     int           tj;
83     real         rinvsix;
84     real         Vvdw6,Vvdwtot;
85     real         r,rt,eps,eps2;
86     int           n0,nnn;
87     real         Y,F,Geps,Heps2,Fp,VV;
88     real         FF;
89     real         fijC;
90     real         isai,isaj,isaprod,gbscale,vgb;
91     real         dvdasum,dvdatmp,dvdaj,fgb;
92     real         Vvdwexp,br;
93     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
94     real         jx1,jy1,jz1;
95     real         dx11,dy11,dz11,rsq11,rinv11;
96     real         c6,cexp1,cexp2;
97     real         weight_cg1, weight_cg2, weight_product;
98     real         hybscal;
99
100     nri              = *p_nri;         
101     ntype            = *p_ntype;       
102     nthreads         = *p_nthreads;    
103     facel            = *p_facel;       
104     krf              = *p_krf;         
105     crf              = *p_crf;         
106     tabscale         = *p_tabscale;    
107     gbtabscale       = *p_gbtabscale;  
108     nouter           = 0;              
109     ninner           = 0;              
110     
111     do
112     {
113         #ifdef GMX_THREAD_SHM_FDECOMP
114         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
115         nn0              = *count;         
116         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
117         *count           = nn1;            
118         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
119         if(nn1>nri) nn1=nri;
120         #else
121         nn0 = 0;
122         nn1 = nri;
123         #endif
124         
125         for(n=nn0; (n<nn1); n++)
126         {
127             is3              = 3*shift[n];     
128             shX              = shiftvec[is3];  
129             shY              = shiftvec[is3+1];
130             shZ              = shiftvec[is3+2];
131             nj0              = jindex[n];      
132             nj1              = jindex[n+1];    
133             ii               = iinr[n];        
134             ii3              = 3*ii;           
135             ix1              = shX + pos[ii3+0];
136             iy1              = shY + pos[ii3+1];
137             iz1              = shZ + pos[ii3+2];
138             iq               = facel*charge[ii];
139             isai             = invsqrta[ii];   
140             nti              = 3*ntype*type[ii];
141             weight_cg1       = wf[ii];         
142             vctot            = 0;              
143             Vvdwtot          = 0;              
144             dvdasum          = 0;              
145             fix1             = 0;              
146             fiy1             = 0;              
147             fiz1             = 0;              
148             
149             for(k=nj0; (k<nj1); k++)
150             {
151                 jnr              = jjnr[k];        
152                 weight_cg2       = wf[jnr];        
153                 weight_product   = weight_cg1*weight_cg2;
154                 if (weight_product < ALMOST_ZERO) {
155                        hybscal = 1.0;
156                 }
157                 else if (weight_product >= ALMOST_ONE)
158                 {
159                   /* force is zero, skip this molecule */
160                        continue;
161                 }
162                 else
163                 {
164                    hybscal = 1.0 - weight_product;
165                 }
166                 j3               = 3*jnr;          
167                 jx1              = pos[j3+0];      
168                 jy1              = pos[j3+1];      
169                 jz1              = pos[j3+2];      
170                 dx11             = ix1 - jx1;      
171                 dy11             = iy1 - jy1;      
172                 dz11             = iz1 - jz1;      
173                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
174                 rinv11           = 1.0/sqrt(rsq11);
175                 isaj             = invsqrta[jnr];  
176                 isaprod          = isai*isaj;      
177                 qq               = iq*charge[jnr]; 
178                 vcoul            = qq*rinv11;      
179                 fscal            = vcoul*rinv11;   
180                 qq               = isaprod*(-qq);  
181                 gbscale          = isaprod*gbtabscale;
182                 tj               = nti+3*type[jnr];
183                 c6               = vdwparam[tj];   
184                 cexp1            = vdwparam[tj+1]; 
185                 cexp2            = vdwparam[tj+2]; 
186                 rinvsq           = rinv11*rinv11;  
187                 dvdaj            = dvda[jnr];      
188                 r                = rsq11*rinv11;   
189                 rt               = r*gbscale;      
190                 n0               = rt;             
191                 eps              = rt-n0;          
192                 eps2             = eps*eps;        
193                 nnn              = 4*n0;           
194                 Y                = GBtab[nnn];     
195                 F                = GBtab[nnn+1];   
196                 Geps             = eps*GBtab[nnn+2];
197                 Heps2            = eps2*GBtab[nnn+3];
198                 Fp               = F+Geps+Heps2;   
199                 VV               = Y+eps*Fp;       
200                 FF               = Fp+Geps+2.0*Heps2;
201                 vgb              = qq*VV;          
202                 fijC             = qq*FF*gbscale;  
203                 dvdatmp          = -0.5*(vgb+fijC*r);
204                 dvdasum          = dvdasum + dvdatmp;
205                 dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
206                 vctot            = vctot + vcoul;  
207                 rinvsix          = rinvsq*rinvsq*rinvsq;
208                 Vvdw6            = c6*rinvsix;     
209                 br               = cexp2*rsq11*rinv11;
210                 Vvdwexp          = cexp1*exp(-br); 
211                 Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
212                 fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv11;
213                 fscal *= hybscal;
214                 tx               = fscal*dx11;     
215                 ty               = fscal*dy11;     
216                 tz               = fscal*dz11;     
217                 fix1             = fix1 + tx;      
218                 fiy1             = fiy1 + ty;      
219                 fiz1             = fiz1 + tz;      
220                 faction[j3+0]    = faction[j3+0] - tx;
221                 faction[j3+1]    = faction[j3+1] - ty;
222                 faction[j3+2]    = faction[j3+2] - tz;
223             }
224             
225             faction[ii3+0]   = faction[ii3+0] + fix1;
226             faction[ii3+1]   = faction[ii3+1] + fiy1;
227             faction[ii3+2]   = faction[ii3+2] + fiz1;
228             fshift[is3]      = fshift[is3]+fix1;
229             fshift[is3+1]    = fshift[is3+1]+fiy1;
230             fshift[is3+2]    = fshift[is3+2]+fiz1;
231             ggid             = gid[n];         
232             Vc[ggid]         = Vc[ggid] + vctot;
233             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
234             dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
235             ninner           = ninner + nj1 - nj0;
236         }
237         
238         nouter           = nouter + nn1 - nn0;
239     }
240     while (nn1<nri);
241     
242     *outeriter       = nouter;         
243     *inneriter       = ninner;         
244 }
245
246
247
248
249
250 /*
251  * Gromacs nonbonded kernel nb_kernel420_adress_ex
252  * Coulomb interaction:     Generalized-Born
253  * VdW interaction:         Buckingham
254  * water optimization:      No
255  * Calculate forces:        yes
256  */
257 void nb_kernel420_adress_ex(
258                     int *           p_nri,
259                     int *           iinr,
260                     int *           jindex,
261                     int *           jjnr,
262                     int *           shift,
263                     real *         shiftvec,
264                     real *         fshift,
265                     int *           gid,
266                     real *         pos,
267                     real *         faction,
268                     real *         charge,
269                     real *         p_facel,
270                     real *         p_krf,
271                     real *         p_crf,
272                     real *         Vc,
273                     int *           type,
274                     int *           p_ntype,
275                     real *         vdwparam,
276                     real *         Vvdw,
277                     real *         p_tabscale,
278                     real *         VFtab,
279                     real *         invsqrta,
280                     real *         dvda,
281                     real *         p_gbtabscale,
282                     real *         GBtab,
283                     int *           p_nthreads,
284                     int *           count,
285                     void *          mtx,
286                     int *           outeriter,
287                     int *           inneriter,
288                     real           force_cap,
289                     real *         wf)
290 {
291     int           nri,ntype,nthreads;
292     real         facel,krf,crf,tabscale,gbtabscale;
293     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
294     int           nn0,nn1,nouter,ninner;
295     real         shX,shY,shZ;
296     real         fscal,tx,ty,tz;
297     real         rinvsq;
298     real         iq;
299     real         qq,vcoul,vctot;
300     int           nti;
301     int           tj;
302     real         rinvsix;
303     real         Vvdw6,Vvdwtot;
304     real         r,rt,eps,eps2;
305     int           n0,nnn;
306     real         Y,F,Geps,Heps2,Fp,VV;
307     real         FF;
308     real         fijC;
309     real         isai,isaj,isaprod,gbscale,vgb;
310     real         dvdasum,dvdatmp,dvdaj,fgb;
311     real         Vvdwexp,br;
312     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
313     real         jx1,jy1,jz1;
314     real         dx11,dy11,dz11,rsq11,rinv11;
315     real         c6,cexp1,cexp2;
316     real         weight_cg1, weight_cg2, weight_product;
317     real         hybscal;
318
319     nri              = *p_nri;         
320     ntype            = *p_ntype;       
321     nthreads         = *p_nthreads;    
322     facel            = *p_facel;       
323     krf              = *p_krf;         
324     crf              = *p_crf;         
325     tabscale         = *p_tabscale;    
326     gbtabscale       = *p_gbtabscale;  
327     nouter           = 0;              
328     ninner           = 0;              
329     
330     do
331     {
332         #ifdef GMX_THREAD_SHM_FDECOMP
333         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
334         nn0              = *count;         
335         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
336         *count           = nn1;            
337         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
338         if(nn1>nri) nn1=nri;
339         #else
340         nn0 = 0;
341         nn1 = nri;
342         #endif
343         
344         for(n=nn0; (n<nn1); n++)
345         {
346             is3              = 3*shift[n];     
347             shX              = shiftvec[is3];  
348             shY              = shiftvec[is3+1];
349             shZ              = shiftvec[is3+2];
350             nj0              = jindex[n];      
351             nj1              = jindex[n+1];    
352             ii               = iinr[n];        
353             ii3              = 3*ii;           
354             ix1              = shX + pos[ii3+0];
355             iy1              = shY + pos[ii3+1];
356             iz1              = shZ + pos[ii3+2];
357             iq               = facel*charge[ii];
358             isai             = invsqrta[ii];   
359             nti              = 3*ntype*type[ii];
360             weight_cg1       = wf[ii];         
361             vctot            = 0;              
362             Vvdwtot          = 0;              
363             dvdasum          = 0;              
364             fix1             = 0;              
365             fiy1             = 0;              
366             fiz1             = 0;              
367             
368             for(k=nj0; (k<nj1); k++)
369             {
370                 jnr              = jjnr[k];        
371                 weight_cg2       = wf[jnr];        
372                 weight_product   = weight_cg1*weight_cg2;
373                 if (weight_product < ALMOST_ZERO) {
374                 /* force is zero, skip this molecule */
375                  continue;
376                 }
377                 else if (weight_product >= ALMOST_ONE)
378                 {
379                        hybscal = 1.0;
380                 }
381                 else
382                 {
383                    hybscal = weight_product;
384                 }
385                 j3               = 3*jnr;          
386                 jx1              = pos[j3+0];      
387                 jy1              = pos[j3+1];      
388                 jz1              = pos[j3+2];      
389                 dx11             = ix1 - jx1;      
390                 dy11             = iy1 - jy1;      
391                 dz11             = iz1 - jz1;      
392                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
393                 rinv11           = 1.0/sqrt(rsq11);
394                 isaj             = invsqrta[jnr];  
395                 isaprod          = isai*isaj;      
396                 qq               = iq*charge[jnr]; 
397                 vcoul            = qq*rinv11;      
398                 fscal            = vcoul*rinv11;   
399                 qq               = isaprod*(-qq);  
400                 gbscale          = isaprod*gbtabscale;
401                 tj               = nti+3*type[jnr];
402                 c6               = vdwparam[tj];   
403                 cexp1            = vdwparam[tj+1]; 
404                 cexp2            = vdwparam[tj+2]; 
405                 rinvsq           = rinv11*rinv11;  
406                 dvdaj            = dvda[jnr];      
407                 r                = rsq11*rinv11;   
408                 rt               = r*gbscale;      
409                 n0               = rt;             
410                 eps              = rt-n0;          
411                 eps2             = eps*eps;        
412                 nnn              = 4*n0;           
413                 Y                = GBtab[nnn];     
414                 F                = GBtab[nnn+1];   
415                 Geps             = eps*GBtab[nnn+2];
416                 Heps2            = eps2*GBtab[nnn+3];
417                 Fp               = F+Geps+Heps2;   
418                 VV               = Y+eps*Fp;       
419                 FF               = Fp+Geps+2.0*Heps2;
420                 vgb              = qq*VV;          
421                 fijC             = qq*FF*gbscale;  
422                 dvdatmp          = -0.5*(vgb+fijC*r);
423                 dvdasum          = dvdasum + dvdatmp;
424                 dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
425                 vctot            = vctot + vcoul;  
426                 rinvsix          = rinvsq*rinvsq*rinvsq;
427                 Vvdw6            = c6*rinvsix;     
428                 br               = cexp2*rsq11*rinv11;
429                 Vvdwexp          = cexp1*exp(-br); 
430                 Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
431                 fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv11;
432                 fscal *= hybscal;
433                 if(force_cap>0 && (fabs(fscal)> force_cap)){
434                 fscal=force_cap*fscal/fabs(fscal);
435                 }
436                 tx               = fscal*dx11;     
437                 ty               = fscal*dy11;     
438                 tz               = fscal*dz11;     
439                 fix1             = fix1 + tx;      
440                 fiy1             = fiy1 + ty;      
441                 fiz1             = fiz1 + tz;      
442                 faction[j3+0]    = faction[j3+0] - tx;
443                 faction[j3+1]    = faction[j3+1] - ty;
444                 faction[j3+2]    = faction[j3+2] - tz;
445             }
446             
447             faction[ii3+0]   = faction[ii3+0] + fix1;
448             faction[ii3+1]   = faction[ii3+1] + fiy1;
449             faction[ii3+2]   = faction[ii3+2] + fiz1;
450             fshift[is3]      = fshift[is3]+fix1;
451             fshift[is3+1]    = fshift[is3+1]+fiy1;
452             fshift[is3+2]    = fshift[is3+2]+fiz1;
453             ggid             = gid[n];         
454             Vc[ggid]         = Vc[ggid] + vctot;
455             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
456             dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
457             ninner           = ninner + nj1 - nj0;
458         }
459         
460         nouter           = nouter + nn1 - nn0;
461     }
462     while (nn1<nri);
463     
464     *outeriter       = nouter;         
465     *inneriter       = ninner;         
466 }
467
468