Merge remote-tracking branch 'origin/release-4-6' into HEAD
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_adress_c / nb_kernel110_c_adress.c
1 /*
2  * Copyright (c) Erik Lindahl, David van der Spoel 2003
3  * 
4  * This file is generated automatically at compile time
5  * by the program mknb in the Gromacs distribution.
6  *
7  * Options used when generation this file:
8  * Language:         c
9  * Precision:        single
10  * Threads:          yes
11  * Software invsqrt: no
12  * PowerPC invsqrt:  no
13  * Prefetch forces:  no
14  * Adress kernel:  yes
15  * Comments:         no
16  */
17 #ifdef HAVE_CONFIG_H
18 #include<config.h>
19 #endif
20 #ifdef GMX_THREAD_SHM_FDECOMP
21 #include<thread_mpi.h>
22 #endif
23 #define ALMOST_ZERO 1e-30
24 #define ALMOST_ONE 1-(1e-30)
25 #include<math.h>
26
27 #include "nb_kernel110_adress.h"
28
29
30
31 /*
32  * Gromacs nonbonded kernel nb_kernel110_adress_cg
33  * Coulomb interaction:     Normal Coulomb
34  * VdW interaction:         Lennard-Jones
35  * water optimization:      No
36  * Calculate forces:        yes
37  */
38 void nb_kernel110_adress_cg(
39                     int *           p_nri,
40                     int *           iinr,
41                     int *           jindex,
42                     int *           jjnr,
43                     int *           shift,
44                     real *         shiftvec,
45                     real *         fshift,
46                     int *           gid,
47                     real *         pos,
48                     real *         faction,
49                     real *         charge,
50                     real *         p_facel,
51                     real *         p_krf,
52                     real *         p_crf,
53                     real *         Vc,
54                     int *           type,
55                     int *           p_ntype,
56                     real *         vdwparam,
57                     real *         Vvdw,
58                     real *         p_tabscale,
59                     real *         VFtab,
60                     real *         invsqrta,
61                     real *         dvda,
62                     real *         p_gbtabscale,
63                     real *         GBtab,
64                     int *           p_nthreads,
65                     int *           count,
66                     void *          mtx,
67                     int *           outeriter,
68                     int *           inneriter,
69                     real           force_cap,
70                     real *         wf)
71 {
72     int           nri,ntype,nthreads;
73     real         facel,krf,crf,tabscale,gbtabscale;
74     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
75     int           nn0,nn1,nouter,ninner;
76     real         shX,shY,shZ;
77     real         fscal,tx,ty,tz;
78     real         rinvsq;
79     real         iq;
80     real         qq,vcoul,vctot;
81     int           nti;
82     int           tj;
83     real         rinvsix;
84     real         Vvdw6,Vvdwtot;
85     real         Vvdw12;
86     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
87     real         jx1,jy1,jz1;
88     real         dx11,dy11,dz11,rsq11,rinv11;
89     real         c6,c12;
90     real         weight_cg1, weight_cg2, weight_product;
91     real         hybscal;
92
93     nri              = *p_nri;         
94     ntype            = *p_ntype;       
95     nthreads         = *p_nthreads;    
96     facel            = *p_facel;       
97     krf              = *p_krf;         
98     crf              = *p_crf;         
99     tabscale         = *p_tabscale;    
100     nouter           = 0;              
101     ninner           = 0;              
102     
103     do
104     {
105         #ifdef GMX_THREAD_SHM_FDECOMP
106         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
107         nn0              = *count;         
108         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
109         *count           = nn1;            
110         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
111         if(nn1>nri) nn1=nri;
112         #else
113         nn0 = 0;
114         nn1 = nri;
115         #endif
116         
117         for(n=nn0; (n<nn1); n++)
118         {
119             is3              = 3*shift[n];     
120             shX              = shiftvec[is3];  
121             shY              = shiftvec[is3+1];
122             shZ              = shiftvec[is3+2];
123             nj0              = jindex[n];      
124             nj1              = jindex[n+1];    
125             ii               = iinr[n];        
126             ii3              = 3*ii;           
127             ix1              = shX + pos[ii3+0];
128             iy1              = shY + pos[ii3+1];
129             iz1              = shZ + pos[ii3+2];
130             iq               = facel*charge[ii];
131             nti              = 2*ntype*type[ii];
132             weight_cg1       = wf[ii];         
133             vctot            = 0;              
134             Vvdwtot          = 0;              
135             fix1             = 0;              
136             fiy1             = 0;              
137             fiz1             = 0;              
138             
139             for(k=nj0; (k<nj1); k++)
140             {
141                 jnr              = jjnr[k];        
142                 weight_cg2       = wf[jnr];        
143                 weight_product   = weight_cg1*weight_cg2;
144                 if (weight_product < ALMOST_ZERO) {
145                        hybscal = 1.0;
146                 }
147                 else if (weight_product >= ALMOST_ONE)
148                 {
149                   /* force is zero, skip this molecule */
150                        continue;
151                 }
152                 else
153                 {
154                    hybscal = 1.0 - weight_product;
155                 }
156                 j3               = 3*jnr;          
157                 jx1              = pos[j3+0];      
158                 jy1              = pos[j3+1];      
159                 jz1              = pos[j3+2];      
160                 dx11             = ix1 - jx1;      
161                 dy11             = iy1 - jy1;      
162                 dz11             = iz1 - jz1;      
163                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
164                 rinv11           = 1.0/sqrt(rsq11);
165                 qq               = iq*charge[jnr]; 
166                 tj               = nti+2*type[jnr];
167                 c6               = vdwparam[tj];   
168                 c12              = vdwparam[tj+1]; 
169                 rinvsq           = rinv11*rinv11;  
170                 vcoul            = qq*rinv11;      
171                 vctot            = vctot+vcoul;    
172                 rinvsix          = rinvsq*rinvsq*rinvsq;
173                 Vvdw6            = c6*rinvsix;     
174                 Vvdw12           = c12*rinvsix*rinvsix;
175                 Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
176                 fscal            = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
177                 fscal *= hybscal;
178                 tx               = fscal*dx11;     
179                 ty               = fscal*dy11;     
180                 tz               = fscal*dz11;     
181                 fix1             = fix1 + tx;      
182                 fiy1             = fiy1 + ty;      
183                 fiz1             = fiz1 + tz;      
184                 faction[j3+0]    = faction[j3+0] - tx;
185                 faction[j3+1]    = faction[j3+1] - ty;
186                 faction[j3+2]    = faction[j3+2] - tz;
187             }
188             
189             faction[ii3+0]   = faction[ii3+0] + fix1;
190             faction[ii3+1]   = faction[ii3+1] + fiy1;
191             faction[ii3+2]   = faction[ii3+2] + fiz1;
192             fshift[is3]      = fshift[is3]+fix1;
193             fshift[is3+1]    = fshift[is3+1]+fiy1;
194             fshift[is3+2]    = fshift[is3+2]+fiz1;
195             ggid             = gid[n];         
196             Vc[ggid]         = Vc[ggid] + vctot;
197             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
198             ninner           = ninner + nj1 - nj0;
199         }
200         
201         nouter           = nouter + nn1 - nn0;
202     }
203     while (nn1<nri);
204     
205     *outeriter       = nouter;         
206     *inneriter       = ninner;         
207 }
208
209
210
211
212
213 /*
214  * Gromacs nonbonded kernel nb_kernel110_adress_ex
215  * Coulomb interaction:     Normal Coulomb
216  * VdW interaction:         Lennard-Jones
217  * water optimization:      No
218  * Calculate forces:        yes
219  */
220 void nb_kernel110_adress_ex(
221                     int *           p_nri,
222                     int *           iinr,
223                     int *           jindex,
224                     int *           jjnr,
225                     int *           shift,
226                     real *         shiftvec,
227                     real *         fshift,
228                     int *           gid,
229                     real *         pos,
230                     real *         faction,
231                     real *         charge,
232                     real *         p_facel,
233                     real *         p_krf,
234                     real *         p_crf,
235                     real *         Vc,
236                     int *           type,
237                     int *           p_ntype,
238                     real *         vdwparam,
239                     real *         Vvdw,
240                     real *         p_tabscale,
241                     real *         VFtab,
242                     real *         invsqrta,
243                     real *         dvda,
244                     real *         p_gbtabscale,
245                     real *         GBtab,
246                     int *           p_nthreads,
247                     int *           count,
248                     void *          mtx,
249                     int *           outeriter,
250                     int *           inneriter,
251                     real           force_cap,
252                     real *         wf)
253 {
254     int           nri,ntype,nthreads;
255     real         facel,krf,crf,tabscale,gbtabscale;
256     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
257     int           nn0,nn1,nouter,ninner;
258     real         shX,shY,shZ;
259     real         fscal,tx,ty,tz;
260     real         rinvsq;
261     real         iq;
262     real         qq,vcoul,vctot;
263     int           nti;
264     int           tj;
265     real         rinvsix;
266     real         Vvdw6,Vvdwtot;
267     real         Vvdw12;
268     real         ix1,iy1,iz1,fix1,fiy1,fiz1;
269     real         jx1,jy1,jz1;
270     real         dx11,dy11,dz11,rsq11,rinv11;
271     real         c6,c12;
272     real         weight_cg1, weight_cg2, weight_product;
273     real         hybscal;
274
275     nri              = *p_nri;         
276     ntype            = *p_ntype;       
277     nthreads         = *p_nthreads;    
278     facel            = *p_facel;       
279     krf              = *p_krf;         
280     crf              = *p_crf;         
281     tabscale         = *p_tabscale;    
282     nouter           = 0;              
283     ninner           = 0;              
284     
285     do
286     {
287         #ifdef GMX_THREAD_SHM_FDECOMP
288         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
289         nn0              = *count;         
290         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
291         *count           = nn1;            
292         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
293         if(nn1>nri) nn1=nri;
294         #else
295         nn0 = 0;
296         nn1 = nri;
297         #endif
298         
299         for(n=nn0; (n<nn1); n++)
300         {
301             is3              = 3*shift[n];     
302             shX              = shiftvec[is3];  
303             shY              = shiftvec[is3+1];
304             shZ              = shiftvec[is3+2];
305             nj0              = jindex[n];      
306             nj1              = jindex[n+1];    
307             ii               = iinr[n];        
308             ii3              = 3*ii;           
309             ix1              = shX + pos[ii3+0];
310             iy1              = shY + pos[ii3+1];
311             iz1              = shZ + pos[ii3+2];
312             iq               = facel*charge[ii];
313             nti              = 2*ntype*type[ii];
314             weight_cg1       = wf[ii];         
315             vctot            = 0;              
316             Vvdwtot          = 0;              
317             fix1             = 0;              
318             fiy1             = 0;              
319             fiz1             = 0;              
320             
321             for(k=nj0; (k<nj1); k++)
322             {
323                 jnr              = jjnr[k];        
324                 weight_cg2       = wf[jnr];        
325                 weight_product   = weight_cg1*weight_cg2;
326                 if (weight_product < ALMOST_ZERO) {
327                 /* force is zero, skip this molecule */
328                  continue;
329                 }
330                 else if (weight_product >= ALMOST_ONE)
331                 {
332                        hybscal = 1.0;
333                 }
334                 else
335                 {
336                    hybscal = weight_product;
337                 }
338                 j3               = 3*jnr;          
339                 jx1              = pos[j3+0];      
340                 jy1              = pos[j3+1];      
341                 jz1              = pos[j3+2];      
342                 dx11             = ix1 - jx1;      
343                 dy11             = iy1 - jy1;      
344                 dz11             = iz1 - jz1;      
345                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
346                 rinv11           = 1.0/sqrt(rsq11);
347                 qq               = iq*charge[jnr]; 
348                 tj               = nti+2*type[jnr];
349                 c6               = vdwparam[tj];   
350                 c12              = vdwparam[tj+1]; 
351                 rinvsq           = rinv11*rinv11;  
352                 vcoul            = qq*rinv11;      
353                 vctot            = vctot+vcoul;    
354                 rinvsix          = rinvsq*rinvsq*rinvsq;
355                 Vvdw6            = c6*rinvsix;     
356                 Vvdw12           = c12*rinvsix*rinvsix;
357                 Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
358                 fscal            = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
359                 fscal *= hybscal;
360                 if(force_cap>0 && (fabs(fscal)> force_cap)){
361                 fscal=force_cap*fscal/fabs(fscal);
362                 }
363                 tx               = fscal*dx11;     
364                 ty               = fscal*dy11;     
365                 tz               = fscal*dz11;     
366                 fix1             = fix1 + tx;      
367                 fiy1             = fiy1 + ty;      
368                 fiz1             = fiz1 + tz;      
369                 faction[j3+0]    = faction[j3+0] - tx;
370                 faction[j3+1]    = faction[j3+1] - ty;
371                 faction[j3+2]    = faction[j3+2] - tz;
372             }
373             
374             faction[ii3+0]   = faction[ii3+0] + fix1;
375             faction[ii3+1]   = faction[ii3+1] + fiy1;
376             faction[ii3+2]   = faction[ii3+2] + fiz1;
377             fshift[is3]      = fshift[is3]+fix1;
378             fshift[is3+1]    = fshift[is3+1]+fiy1;
379             fshift[is3+2]    = fshift[is3+2]+fiz1;
380             ggid             = gid[n];         
381             Vc[ggid]         = Vc[ggid] + vctot;
382             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
383             ninner           = ninner + nj1 - nj0;
384         }
385         
386         nouter           = nouter + nn1 - nn0;
387     }
388     while (nn1<nri);
389     
390     *outeriter       = nouter;         
391     *inneriter       = ninner;         
392 }
393
394