Merge remote-tracking branch 'origin/release-4-6' into HEAD
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_adress_c / nb_kernel430_c_adress.c
1 /*
2  * Copyright (c) Erik Lindahl, David van der Spoel 2003
3  * 
4  * This file is generated automatically at compile time
5  * by the program mknb in the Gromacs distribution.
6  *
7  * Options used when generation this file:
8  * Language:         c
9  * Precision:        single
10  * Threads:          yes
11  * Software invsqrt: no
12  * PowerPC invsqrt:  no
13  * Prefetch forces:  no
14  * Adress kernel:  yes
15  * Comments:         no
16  */
17 #ifdef HAVE_CONFIG_H
18 #include<config.h>
19 #endif
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
22 #endif
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
25 #include<math.h>
26
27 #include "nb_kernel430_adress.h"
28
29
30
31 /*
32  * Gromacs nonbonded kernel nb_kernel430_adress_cg
33  * Coulomb interaction:     Generalized-Born
34  * VdW interaction:         Tabulated
35  * water optimization:      No
36  * Calculate forces:        yes
37  */
38 void nb_kernel430_adress_cg(
39                     int *           p_nri,
40                     int *           iinr,
41                     int *           jindex,
42                     int *           jjnr,
43                     int *           shift,
44                     real *         shiftvec,
45                     real *         fshift,
46                     int *           gid,
47                     real *         pos,
48                     real *         faction,
49                     real *         charge,
50                     real *         p_facel,
51                     real *         p_krf,
52                     real *         p_crf,
53                     real *         Vc,
54                     int *           type,
55                     int *           p_ntype,
56                     real *         vdwparam,
57                     real *         Vvdw,
58                     real *         p_tabscale,
59                     real *         VFtab,
60                     real *         invsqrta,
61                     real *         dvda,
62                     real *         p_gbtabscale,
63                     real *         GBtab,
64                     int *           p_nthreads,
65                     int *           count,
66                     void *          mtx,
67                     int *           outeriter,
68                     int *           inneriter,
69                     real           force_cap,
70                     real *         wf)
71 {
72     int           nri,ntype,nthreads;
73     real         facel,krf,crf,tabscale,gbtabscale;
74     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75     int           nn0,nn1,nouter,ninner;
76     real         shX,shY,shZ;
77     real         fscal,tx,ty,tz;
78     real         iq;
79     real         qq,vcoul,vctot;
80     int           nti;
81     int           tj;
82     real         Vvdw6,Vvdwtot;
83     real         Vvdw12;
84     real         r,rt,eps,eps2;
85     int           n0,nnn;
86     real         Y,F,Geps,Heps2,Fp,VV;
87     real         FF;
88     real         fijC;
89     real         fijD,fijR;
90     real         isai,isaj,isaprod,gbscale,vgb;
91     real         dvdasum,dvdatmp,dvdaj,fgb;
92     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
93     real         jx1,jy1,jz1;
94     real         dx11,dy11,dz11,rsq11,rinv11;
95     real         c6,c12;
96     real         weight_cg1, weight_cg2, weight_product;
97     real         hybscal;
98
99     nri              = *p_nri;         
100     ntype            = *p_ntype;       
101     nthreads         = *p_nthreads;    
102     facel            = *p_facel;       
103     krf              = *p_krf;         
104     crf              = *p_crf;         
105     tabscale         = *p_tabscale;    
106     gbtabscale       = *p_gbtabscale;  
107     nouter           = 0;              
108     ninner           = 0;              
109     
110     do
111     {
112         #ifdef GMX_THREAD_SHM_FDECOMP
113         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
114         nn0              = *count;         
115         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
116         *count           = nn1;            
117         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
118         if(nn1>nri) nn1=nri;
119         #else
120         nn0 = 0;
121         nn1 = nri;
122         #endif
123         
124         for(n=nn0; (n<nn1); n++)
125         {
126             is3              = 3*shift[n];     
127             shX              = shiftvec[is3];  
128             shY              = shiftvec[is3+1];
129             shZ              = shiftvec[is3+2];
130             nj0              = jindex[n];      
131             nj1              = jindex[n+1];    
132             ii               = iinr[n];        
133             ii3              = 3*ii;           
134             ix1              = shX + pos[ii3+0];
135             iy1              = shY + pos[ii3+1];
136             iz1              = shZ + pos[ii3+2];
137             iq               = facel*charge[ii];
138             isai             = invsqrta[ii];   
139             nti              = 2*ntype*type[ii];
140             weight_cg1       = wf[ii];         
141             vctot            = 0;              
142             Vvdwtot          = 0;              
143             dvdasum          = 0;              
144             fix1             = 0;              
145             fiy1             = 0;              
146             fiz1             = 0;              
147             
148             for(k=nj0; (k<nj1); k++)
149             {
150                 jnr              = jjnr[k];        
151                 weight_cg2       = wf[jnr];        
152                 weight_product   = weight_cg1*weight_cg2;
153                 if (weight_product < ALMOST_ZERO) {
154                        hybscal = 1.0;
155                 }
156                 else if (weight_product >= ALMOST_ONE)
157                 {
158                   /* force is zero, skip this molecule */
159                        continue;
160                 }
161                 else
162                 {
163                    hybscal = 1.0 - weight_product;
164                 }
165                 j3               = 3*jnr;          
166                 jx1              = pos[j3+0];      
167                 jy1              = pos[j3+1];      
168                 jz1              = pos[j3+2];      
169                 dx11             = ix1 - jx1;      
170                 dy11             = iy1 - jy1;      
171                 dz11             = iz1 - jz1;      
172                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
173                 rinv11           = 1.0/sqrt(rsq11);
174                 isaj             = invsqrta[jnr];  
175                 isaprod          = isai*isaj;      
176                 qq               = iq*charge[jnr]; 
177                 vcoul            = qq*rinv11;      
178                 fscal            = vcoul*rinv11;   
179                 qq               = isaprod*(-qq);  
180                 gbscale          = isaprod*gbtabscale;
181                 tj               = nti+2*type[jnr];
182                 c6               = vdwparam[tj];   
183                 c12              = vdwparam[tj+1]; 
184                 dvdaj            = dvda[jnr];      
185                 r                = rsq11*rinv11;   
186                 rt               = r*gbscale;      
187                 n0               = rt;             
188                 eps              = rt-n0;          
189                 eps2             = eps*eps;        
190                 nnn              = 4*n0;           
191                 Y                = GBtab[nnn];     
192                 F                = GBtab[nnn+1];   
193                 Geps             = eps*GBtab[nnn+2];
194                 Heps2            = eps2*GBtab[nnn+3];
195                 Fp               = F+Geps+Heps2;   
196                 VV               = Y+eps*Fp;       
197                 FF               = Fp+Geps+2.0*Heps2;
198                 vgb              = qq*VV;          
199                 fijC             = qq*FF*gbscale;  
200                 dvdatmp          = -0.5*(vgb+fijC*r);
201                 dvdasum          = dvdasum + dvdatmp;
202                 dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
203                 vctot            = vctot + vcoul;  
204                 r                = rsq11*rinv11;   
205                 rt               = r*tabscale;     
206                 n0               = rt;             
207                 eps              = rt-n0;          
208                 eps2             = eps*eps;        
209                 nnn              = 8*n0;           
210                 Y                = VFtab[nnn];     
211                 F                = VFtab[nnn+1];   
212                 Geps             = eps*VFtab[nnn+2];
213                 Heps2            = eps2*VFtab[nnn+3];
214                 Fp               = F+Geps+Heps2;   
215                 VV               = Y+eps*Fp;       
216                 FF               = Fp+Geps+2.0*Heps2;
217                 Vvdw6            = c6*VV;          
218                 fijD             = c6*FF;          
219                 nnn              = nnn+4;          
220                 Y                = VFtab[nnn];     
221                 F                = VFtab[nnn+1];   
222                 Geps             = eps*VFtab[nnn+2];
223                 Heps2            = eps2*VFtab[nnn+3];
224                 Fp               = F+Geps+Heps2;   
225                 VV               = Y+eps*Fp;       
226                 FF               = Fp+Geps+2.0*Heps2;
227                 Vvdw12           = c12*VV;         
228                 fijR             = c12*FF;         
229                 Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
230                 fscal            = -((fijD+fijR)*tabscale+fijC-fscal)*rinv11;
231                 fscal *= hybscal;
232                 tx               = fscal*dx11;     
233                 ty               = fscal*dy11;     
234                 tz               = fscal*dz11;     
235                 fix1             = fix1 + tx;      
236                 fiy1             = fiy1 + ty;      
237                 fiz1             = fiz1 + tz;      
238                 faction[j3+0]    = faction[j3+0] - tx;
239                 faction[j3+1]    = faction[j3+1] - ty;
240                 faction[j3+2]    = faction[j3+2] - tz;
241             }
242             
243             faction[ii3+0]   = faction[ii3+0] + fix1;
244             faction[ii3+1]   = faction[ii3+1] + fiy1;
245             faction[ii3+2]   = faction[ii3+2] + fiz1;
246             fshift[is3]      = fshift[is3]+fix1;
247             fshift[is3+1]    = fshift[is3+1]+fiy1;
248             fshift[is3+2]    = fshift[is3+2]+fiz1;
249             ggid             = gid[n];         
250             Vc[ggid]         = Vc[ggid] + vctot;
251             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
252             dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
253             ninner           = ninner + nj1 - nj0;
254         }
255         
256         nouter           = nouter + nn1 - nn0;
257     }
258     while (nn1<nri);
259     
260     *outeriter       = nouter;         
261     *inneriter       = ninner;         
262 }
263
264
265
266
267
268 /*
269  * Gromacs nonbonded kernel nb_kernel430_adress_ex
270  * Coulomb interaction:     Generalized-Born
271  * VdW interaction:         Tabulated
272  * water optimization:      No
273  * Calculate forces:        yes
274  */
275 void nb_kernel430_adress_ex(
276                     int *           p_nri,
277                     int *           iinr,
278                     int *           jindex,
279                     int *           jjnr,
280                     int *           shift,
281                     real *         shiftvec,
282                     real *         fshift,
283                     int *           gid,
284                     real *         pos,
285                     real *         faction,
286                     real *         charge,
287                     real *         p_facel,
288                     real *         p_krf,
289                     real *         p_crf,
290                     real *         Vc,
291                     int *           type,
292                     int *           p_ntype,
293                     real *         vdwparam,
294                     real *         Vvdw,
295                     real *         p_tabscale,
296                     real *         VFtab,
297                     real *         invsqrta,
298                     real *         dvda,
299                     real *         p_gbtabscale,
300                     real *         GBtab,
301                     int *           p_nthreads,
302                     int *           count,
303                     void *          mtx,
304                     int *           outeriter,
305                     int *           inneriter,
306                     real           force_cap,
307                     real *         wf)
308 {
309     int           nri,ntype,nthreads;
310     real         facel,krf,crf,tabscale,gbtabscale;
311     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
312     int           nn0,nn1,nouter,ninner;
313     real         shX,shY,shZ;
314     real         fscal,tx,ty,tz;
315     real         iq;
316     real         qq,vcoul,vctot;
317     int           nti;
318     int           tj;
319     real         Vvdw6,Vvdwtot;
320     real         Vvdw12;
321     real         r,rt,eps,eps2;
322     int           n0,nnn;
323     real         Y,F,Geps,Heps2,Fp,VV;
324     real         FF;
325     real         fijC;
326     real         fijD,fijR;
327     real         isai,isaj,isaprod,gbscale,vgb;
328     real         dvdasum,dvdatmp,dvdaj,fgb;
329     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
330     real         jx1,jy1,jz1;
331     real         dx11,dy11,dz11,rsq11,rinv11;
332     real         c6,c12;
333     real         weight_cg1, weight_cg2, weight_product;
334     real         hybscal;
335
336     nri              = *p_nri;         
337     ntype            = *p_ntype;       
338     nthreads         = *p_nthreads;    
339     facel            = *p_facel;       
340     krf              = *p_krf;         
341     crf              = *p_crf;         
342     tabscale         = *p_tabscale;    
343     gbtabscale       = *p_gbtabscale;  
344     nouter           = 0;              
345     ninner           = 0;              
346     
347     do
348     {
349         #ifdef GMX_THREAD_SHM_FDECOMP
350         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
351         nn0              = *count;         
352         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
353         *count           = nn1;            
354         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
355         if(nn1>nri) nn1=nri;
356         #else
357         nn0 = 0;
358         nn1 = nri;
359         #endif
360         
361         for(n=nn0; (n<nn1); n++)
362         {
363             is3              = 3*shift[n];     
364             shX              = shiftvec[is3];  
365             shY              = shiftvec[is3+1];
366             shZ              = shiftvec[is3+2];
367             nj0              = jindex[n];      
368             nj1              = jindex[n+1];    
369             ii               = iinr[n];        
370             ii3              = 3*ii;           
371             ix1              = shX + pos[ii3+0];
372             iy1              = shY + pos[ii3+1];
373             iz1              = shZ + pos[ii3+2];
374             iq               = facel*charge[ii];
375             isai             = invsqrta[ii];   
376             nti              = 2*ntype*type[ii];
377             weight_cg1       = wf[ii];         
378             vctot            = 0;              
379             Vvdwtot          = 0;              
380             dvdasum          = 0;              
381             fix1             = 0;              
382             fiy1             = 0;              
383             fiz1             = 0;              
384             
385             for(k=nj0; (k<nj1); k++)
386             {
387                 jnr              = jjnr[k];        
388                 weight_cg2       = wf[jnr];        
389                 weight_product   = weight_cg1*weight_cg2;
390                 if (weight_product < ALMOST_ZERO) {
391                 /* force is zero, skip this molecule */
392                  continue;
393                 }
394                 else if (weight_product >= ALMOST_ONE)
395                 {
396                        hybscal = 1.0;
397                 }
398                 else
399                 {
400                    hybscal = weight_product;
401                 }
402                 j3               = 3*jnr;          
403                 jx1              = pos[j3+0];      
404                 jy1              = pos[j3+1];      
405                 jz1              = pos[j3+2];      
406                 dx11             = ix1 - jx1;      
407                 dy11             = iy1 - jy1;      
408                 dz11             = iz1 - jz1;      
409                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
410                 rinv11           = 1.0/sqrt(rsq11);
411                 isaj             = invsqrta[jnr];  
412                 isaprod          = isai*isaj;      
413                 qq               = iq*charge[jnr]; 
414                 vcoul            = qq*rinv11;      
415                 fscal            = vcoul*rinv11;   
416                 qq               = isaprod*(-qq);  
417                 gbscale          = isaprod*gbtabscale;
418                 tj               = nti+2*type[jnr];
419                 c6               = vdwparam[tj];   
420                 c12              = vdwparam[tj+1]; 
421                 dvdaj            = dvda[jnr];      
422                 r                = rsq11*rinv11;   
423                 rt               = r*gbscale;      
424                 n0               = rt;             
425                 eps              = rt-n0;          
426                 eps2             = eps*eps;        
427                 nnn              = 4*n0;           
428                 Y                = GBtab[nnn];     
429                 F                = GBtab[nnn+1];   
430                 Geps             = eps*GBtab[nnn+2];
431                 Heps2            = eps2*GBtab[nnn+3];
432                 Fp               = F+Geps+Heps2;   
433                 VV               = Y+eps*Fp;       
434                 FF               = Fp+Geps+2.0*Heps2;
435                 vgb              = qq*VV;          
436                 fijC             = qq*FF*gbscale;  
437                 dvdatmp          = -0.5*(vgb+fijC*r);
438                 dvdasum          = dvdasum + dvdatmp;
439                 dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
440                 vctot            = vctot + vcoul;  
441                 r                = rsq11*rinv11;   
442                 rt               = r*tabscale;     
443                 n0               = rt;             
444                 eps              = rt-n0;          
445                 eps2             = eps*eps;        
446                 nnn              = 8*n0;           
447                 Y                = VFtab[nnn];     
448                 F                = VFtab[nnn+1];   
449                 Geps             = eps*VFtab[nnn+2];
450                 Heps2            = eps2*VFtab[nnn+3];
451                 Fp               = F+Geps+Heps2;   
452                 VV               = Y+eps*Fp;       
453                 FF               = Fp+Geps+2.0*Heps2;
454                 Vvdw6            = c6*VV;          
455                 fijD             = c6*FF;          
456                 nnn              = nnn+4;          
457                 Y                = VFtab[nnn];     
458                 F                = VFtab[nnn+1];   
459                 Geps             = eps*VFtab[nnn+2];
460                 Heps2            = eps2*VFtab[nnn+3];
461                 Fp               = F+Geps+Heps2;   
462                 VV               = Y+eps*Fp;       
463                 FF               = Fp+Geps+2.0*Heps2;
464                 Vvdw12           = c12*VV;         
465                 fijR             = c12*FF;         
466                 Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
467                 fscal            = -((fijD+fijR)*tabscale+fijC-fscal)*rinv11;
468                 fscal *= hybscal;
469                 if(force_cap>0 && (fabs(fscal)> force_cap)){
470                 fscal=force_cap*fscal/fabs(fscal);
471                 }
472                 tx               = fscal*dx11;     
473                 ty               = fscal*dy11;     
474                 tz               = fscal*dz11;     
475                 fix1             = fix1 + tx;      
476                 fiy1             = fiy1 + ty;      
477                 fiz1             = fiz1 + tz;      
478                 faction[j3+0]    = faction[j3+0] - tx;
479                 faction[j3+1]    = faction[j3+1] - ty;
480                 faction[j3+2]    = faction[j3+2] - tz;
481             }
482             
483             faction[ii3+0]   = faction[ii3+0] + fix1;
484             faction[ii3+1]   = faction[ii3+1] + fiy1;
485             faction[ii3+2]   = faction[ii3+2] + fiz1;
486             fshift[is3]      = fshift[is3]+fix1;
487             fshift[is3+1]    = fshift[is3+1]+fiy1;
488             fshift[is3+2]    = fshift[is3+2]+fiz1;
489             ggid             = gid[n];         
490             Vc[ggid]         = Vc[ggid] + vctot;
491             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
492             dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
493             ninner           = ninner + nj1 - nj0;
494         }
495         
496         nouter           = nouter + nn1 - nn0;
497     }
498     while (nn1<nri);
499     
500     *outeriter       = nouter;         
501     *inneriter       = ninner;         
502 }
503
504