Fix component for libcudart
[alexxy/gromacs.git] / src / gmxlib / nonbonded / nb_kernel_c / nb_kernel_ElecEw_VdwCSTab_GeomP1P1_c.c
1 /*
2  * Note: this file was generated by the Gromacs c kernel generator.
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  * Copyright (c) 2001-2012, The GROMACS Development Team
9  *
10  * Gromacs is a library for molecular simulation and trajectory analysis,
11  * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
12  * a full list of developers and information, check out http://www.gromacs.org
13  *
14  * This program is free software; you can redistribute it and/or modify it under
15  * the terms of the GNU Lesser General Public License as published by the Free
16  * Software Foundation; either version 2 of the License, or (at your option) any
17  * later version.
18  *
19  * To help fund GROMACS development, we humbly ask that you cite
20  * the papers people have written on it - you can find them on the website.
21  */
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #include <math.h>
27
28 #include "../nb_kernel.h"
29 #include "types/simple.h"
30 #include "vec.h"
31 #include "nrnb.h"
32
33 /*
34  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c
35  * Electrostatics interaction: Ewald
36  * VdW interaction:            CubicSplineTable
37  * Geometry:                   Particle-Particle
38  * Calculate force/pot:        PotentialAndForce
39  */
40 void
41 nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c
42                     (t_nblist * gmx_restrict                nlist,
43                      rvec * gmx_restrict                    xx,
44                      rvec * gmx_restrict                    ff,
45                      t_forcerec * gmx_restrict              fr,
46                      t_mdatoms * gmx_restrict               mdatoms,
47                      nb_kernel_data_t * gmx_restrict        kernel_data,
48                      t_nrnb * gmx_restrict                  nrnb)
49 {
50     int              i_shift_offset,i_coord_offset,j_coord_offset;
51     int              j_index_start,j_index_end;
52     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
53     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
54     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
55     real             *shiftvec,*fshift,*x,*f;
56     int              vdwioffset0;
57     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
58     int              vdwjidx0;
59     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
60     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
61     real             velec,felec,velecsum,facel,crf,krf,krf2;
62     real             *charge;
63     int              nvdwtype;
64     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
65     int              *vdwtype;
66     real             *vdwparam;
67     int              vfitab;
68     real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
69     real             *vftab;
70     int              ewitab;
71     real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
72     real             *ewtab;
73
74     x                = xx[0];
75     f                = ff[0];
76
77     nri              = nlist->nri;
78     iinr             = nlist->iinr;
79     jindex           = nlist->jindex;
80     jjnr             = nlist->jjnr;
81     shiftidx         = nlist->shift;
82     gid              = nlist->gid;
83     shiftvec         = fr->shift_vec[0];
84     fshift           = fr->fshift[0];
85     facel            = fr->epsfac;
86     charge           = mdatoms->chargeA;
87     nvdwtype         = fr->ntype;
88     vdwparam         = fr->nbfp;
89     vdwtype          = mdatoms->typeA;
90
91     vftab            = kernel_data->table_vdw->data;
92     vftabscale       = kernel_data->table_vdw->scale;
93
94     sh_ewald         = fr->ic->sh_ewald;
95     ewtab            = fr->ic->tabq_coul_FDV0;
96     ewtabscale       = fr->ic->tabq_scale;
97     ewtabhalfspace   = 0.5/ewtabscale;
98
99     outeriter        = 0;
100     inneriter        = 0;
101
102     /* Start outer loop over neighborlists */
103     for(iidx=0; iidx<nri; iidx++)
104     {
105         /* Load shift vector for this list */
106         i_shift_offset   = DIM*shiftidx[iidx];
107         shX              = shiftvec[i_shift_offset+XX];
108         shY              = shiftvec[i_shift_offset+YY];
109         shZ              = shiftvec[i_shift_offset+ZZ];
110
111         /* Load limits for loop over neighbors */
112         j_index_start    = jindex[iidx];
113         j_index_end      = jindex[iidx+1];
114
115         /* Get outer coordinate index */
116         inr              = iinr[iidx];
117         i_coord_offset   = DIM*inr;
118
119         /* Load i particle coords and add shift vector */
120         ix0              = shX + x[i_coord_offset+DIM*0+XX];
121         iy0              = shY + x[i_coord_offset+DIM*0+YY];
122         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
123
124         fix0             = 0.0;
125         fiy0             = 0.0;
126         fiz0             = 0.0;
127
128         /* Load parameters for i particles */
129         iq0              = facel*charge[inr+0];
130         vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
131
132         /* Reset potential sums */
133         velecsum         = 0.0;
134         vvdwsum          = 0.0;
135
136         /* Start inner kernel loop */
137         for(jidx=j_index_start; jidx<j_index_end; jidx++)
138         {
139             /* Get j neighbor index, and coordinate index */
140             jnr              = jjnr[jidx];
141             j_coord_offset   = DIM*jnr;
142
143             /* load j atom coordinates */
144             jx0              = x[j_coord_offset+DIM*0+XX];
145             jy0              = x[j_coord_offset+DIM*0+YY];
146             jz0              = x[j_coord_offset+DIM*0+ZZ];
147
148             /* Calculate displacement vector */
149             dx00             = ix0 - jx0;
150             dy00             = iy0 - jy0;
151             dz00             = iz0 - jz0;
152
153             /* Calculate squared distance and things based on it */
154             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
155
156             rinv00           = gmx_invsqrt(rsq00);
157
158             rinvsq00         = rinv00*rinv00;
159
160             /* Load parameters for j particles */
161             jq0              = charge[jnr+0];
162             vdwjidx0         = 2*vdwtype[jnr+0];
163
164             /**************************
165              * CALCULATE INTERACTIONS *
166              **************************/
167
168             r00              = rsq00*rinv00;
169
170             qq00             = iq0*jq0;
171             c6_00            = vdwparam[vdwioffset0+vdwjidx0];
172             c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
173
174             /* Calculate table index by multiplying r with table scale and truncate to integer */
175             rt               = r00*vftabscale;
176             vfitab           = rt;
177             vfeps            = rt-vfitab;
178             vfitab           = 2*4*vfitab;
179
180             /* EWALD ELECTROSTATICS */
181
182             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
183             ewrt             = r00*ewtabscale;
184             ewitab           = ewrt;
185             eweps            = ewrt-ewitab;
186             ewitab           = 4*ewitab;
187             felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
188             velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
189             felec            = qq00*rinv00*(rinvsq00-felec);
190
191             /* CUBIC SPLINE TABLE DISPERSION */
192             vfitab          += 0;
193             Y                = vftab[vfitab];
194             F                = vftab[vfitab+1];
195             Geps             = vfeps*vftab[vfitab+2];
196             Heps2            = vfeps*vfeps*vftab[vfitab+3];
197             Fp               = F+Geps+Heps2;
198             VV               = Y+vfeps*Fp;
199             vvdw6            = c6_00*VV;
200             FF               = Fp+Geps+2.0*Heps2;
201             fvdw6            = c6_00*FF;
202
203             /* CUBIC SPLINE TABLE REPULSION */
204             Y                = vftab[vfitab+4];
205             F                = vftab[vfitab+5];
206             Geps             = vfeps*vftab[vfitab+6];
207             Heps2            = vfeps*vfeps*vftab[vfitab+7];
208             Fp               = F+Geps+Heps2;
209             VV               = Y+vfeps*Fp;
210             vvdw12           = c12_00*VV;
211             FF               = Fp+Geps+2.0*Heps2;
212             fvdw12           = c12_00*FF;
213             vvdw             = vvdw12+vvdw6;
214             fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
215
216             /* Update potential sums from outer loop */
217             velecsum        += velec;
218             vvdwsum         += vvdw;
219
220             fscal            = felec+fvdw;
221
222             /* Calculate temporary vectorial force */
223             tx               = fscal*dx00;
224             ty               = fscal*dy00;
225             tz               = fscal*dz00;
226
227             /* Update vectorial force */
228             fix0            += tx;
229             fiy0            += ty;
230             fiz0            += tz;
231             f[j_coord_offset+DIM*0+XX] -= tx;
232             f[j_coord_offset+DIM*0+YY] -= ty;
233             f[j_coord_offset+DIM*0+ZZ] -= tz;
234
235             /* Inner loop uses 74 flops */
236         }
237         /* End of innermost loop */
238
239         tx = ty = tz = 0;
240         f[i_coord_offset+DIM*0+XX] += fix0;
241         f[i_coord_offset+DIM*0+YY] += fiy0;
242         f[i_coord_offset+DIM*0+ZZ] += fiz0;
243         tx                         += fix0;
244         ty                         += fiy0;
245         tz                         += fiz0;
246         fshift[i_shift_offset+XX]  += tx;
247         fshift[i_shift_offset+YY]  += ty;
248         fshift[i_shift_offset+ZZ]  += tz;
249
250         ggid                        = gid[iidx];
251         /* Update potential energies */
252         kernel_data->energygrp_elec[ggid] += velecsum;
253         kernel_data->energygrp_vdw[ggid] += vvdwsum;
254
255         /* Increment number of inner iterations */
256         inneriter                  += j_index_end - j_index_start;
257
258         /* Outer loop uses 15 flops */
259     }
260
261     /* Increment number of outer iterations */
262     outeriter        += nri;
263
264     /* Update outer/inner flops */
265
266     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*74);
267 }
268 /*
269  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c
270  * Electrostatics interaction: Ewald
271  * VdW interaction:            CubicSplineTable
272  * Geometry:                   Particle-Particle
273  * Calculate force/pot:        Force
274  */
275 void
276 nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c
277                     (t_nblist * gmx_restrict                nlist,
278                      rvec * gmx_restrict                    xx,
279                      rvec * gmx_restrict                    ff,
280                      t_forcerec * gmx_restrict              fr,
281                      t_mdatoms * gmx_restrict               mdatoms,
282                      nb_kernel_data_t * gmx_restrict        kernel_data,
283                      t_nrnb * gmx_restrict                  nrnb)
284 {
285     int              i_shift_offset,i_coord_offset,j_coord_offset;
286     int              j_index_start,j_index_end;
287     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
288     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
289     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
290     real             *shiftvec,*fshift,*x,*f;
291     int              vdwioffset0;
292     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
293     int              vdwjidx0;
294     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
295     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
296     real             velec,felec,velecsum,facel,crf,krf,krf2;
297     real             *charge;
298     int              nvdwtype;
299     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
300     int              *vdwtype;
301     real             *vdwparam;
302     int              vfitab;
303     real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
304     real             *vftab;
305     int              ewitab;
306     real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
307     real             *ewtab;
308
309     x                = xx[0];
310     f                = ff[0];
311
312     nri              = nlist->nri;
313     iinr             = nlist->iinr;
314     jindex           = nlist->jindex;
315     jjnr             = nlist->jjnr;
316     shiftidx         = nlist->shift;
317     gid              = nlist->gid;
318     shiftvec         = fr->shift_vec[0];
319     fshift           = fr->fshift[0];
320     facel            = fr->epsfac;
321     charge           = mdatoms->chargeA;
322     nvdwtype         = fr->ntype;
323     vdwparam         = fr->nbfp;
324     vdwtype          = mdatoms->typeA;
325
326     vftab            = kernel_data->table_vdw->data;
327     vftabscale       = kernel_data->table_vdw->scale;
328
329     sh_ewald         = fr->ic->sh_ewald;
330     ewtab            = fr->ic->tabq_coul_F;
331     ewtabscale       = fr->ic->tabq_scale;
332     ewtabhalfspace   = 0.5/ewtabscale;
333
334     outeriter        = 0;
335     inneriter        = 0;
336
337     /* Start outer loop over neighborlists */
338     for(iidx=0; iidx<nri; iidx++)
339     {
340         /* Load shift vector for this list */
341         i_shift_offset   = DIM*shiftidx[iidx];
342         shX              = shiftvec[i_shift_offset+XX];
343         shY              = shiftvec[i_shift_offset+YY];
344         shZ              = shiftvec[i_shift_offset+ZZ];
345
346         /* Load limits for loop over neighbors */
347         j_index_start    = jindex[iidx];
348         j_index_end      = jindex[iidx+1];
349
350         /* Get outer coordinate index */
351         inr              = iinr[iidx];
352         i_coord_offset   = DIM*inr;
353
354         /* Load i particle coords and add shift vector */
355         ix0              = shX + x[i_coord_offset+DIM*0+XX];
356         iy0              = shY + x[i_coord_offset+DIM*0+YY];
357         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
358
359         fix0             = 0.0;
360         fiy0             = 0.0;
361         fiz0             = 0.0;
362
363         /* Load parameters for i particles */
364         iq0              = facel*charge[inr+0];
365         vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
366
367         /* Start inner kernel loop */
368         for(jidx=j_index_start; jidx<j_index_end; jidx++)
369         {
370             /* Get j neighbor index, and coordinate index */
371             jnr              = jjnr[jidx];
372             j_coord_offset   = DIM*jnr;
373
374             /* load j atom coordinates */
375             jx0              = x[j_coord_offset+DIM*0+XX];
376             jy0              = x[j_coord_offset+DIM*0+YY];
377             jz0              = x[j_coord_offset+DIM*0+ZZ];
378
379             /* Calculate displacement vector */
380             dx00             = ix0 - jx0;
381             dy00             = iy0 - jy0;
382             dz00             = iz0 - jz0;
383
384             /* Calculate squared distance and things based on it */
385             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
386
387             rinv00           = gmx_invsqrt(rsq00);
388
389             rinvsq00         = rinv00*rinv00;
390
391             /* Load parameters for j particles */
392             jq0              = charge[jnr+0];
393             vdwjidx0         = 2*vdwtype[jnr+0];
394
395             /**************************
396              * CALCULATE INTERACTIONS *
397              **************************/
398
399             r00              = rsq00*rinv00;
400
401             qq00             = iq0*jq0;
402             c6_00            = vdwparam[vdwioffset0+vdwjidx0];
403             c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
404
405             /* Calculate table index by multiplying r with table scale and truncate to integer */
406             rt               = r00*vftabscale;
407             vfitab           = rt;
408             vfeps            = rt-vfitab;
409             vfitab           = 2*4*vfitab;
410
411             /* EWALD ELECTROSTATICS */
412
413             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
414             ewrt             = r00*ewtabscale;
415             ewitab           = ewrt;
416             eweps            = ewrt-ewitab;
417             felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
418             felec            = qq00*rinv00*(rinvsq00-felec);
419
420             /* CUBIC SPLINE TABLE DISPERSION */
421             vfitab          += 0;
422             Y                = vftab[vfitab];
423             F                = vftab[vfitab+1];
424             Geps             = vfeps*vftab[vfitab+2];
425             Heps2            = vfeps*vfeps*vftab[vfitab+3];
426             Fp               = F+Geps+Heps2;
427             FF               = Fp+Geps+2.0*Heps2;
428             fvdw6            = c6_00*FF;
429
430             /* CUBIC SPLINE TABLE REPULSION */
431             Y                = vftab[vfitab+4];
432             F                = vftab[vfitab+5];
433             Geps             = vfeps*vftab[vfitab+6];
434             Heps2            = vfeps*vfeps*vftab[vfitab+7];
435             Fp               = F+Geps+Heps2;
436             FF               = Fp+Geps+2.0*Heps2;
437             fvdw12           = c12_00*FF;
438             fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
439
440             fscal            = felec+fvdw;
441
442             /* Calculate temporary vectorial force */
443             tx               = fscal*dx00;
444             ty               = fscal*dy00;
445             tz               = fscal*dz00;
446
447             /* Update vectorial force */
448             fix0            += tx;
449             fiy0            += ty;
450             fiz0            += tz;
451             f[j_coord_offset+DIM*0+XX] -= tx;
452             f[j_coord_offset+DIM*0+YY] -= ty;
453             f[j_coord_offset+DIM*0+ZZ] -= tz;
454
455             /* Inner loop uses 59 flops */
456         }
457         /* End of innermost loop */
458
459         tx = ty = tz = 0;
460         f[i_coord_offset+DIM*0+XX] += fix0;
461         f[i_coord_offset+DIM*0+YY] += fiy0;
462         f[i_coord_offset+DIM*0+ZZ] += fiz0;
463         tx                         += fix0;
464         ty                         += fiy0;
465         tz                         += fiz0;
466         fshift[i_shift_offset+XX]  += tx;
467         fshift[i_shift_offset+YY]  += ty;
468         fshift[i_shift_offset+ZZ]  += tz;
469
470         /* Increment number of inner iterations */
471         inneriter                  += j_index_end - j_index_start;
472
473         /* Outer loop uses 13 flops */
474     }
475
476     /* Increment number of outer iterations */
477     outeriter        += nri;
478
479     /* Update outer/inner flops */
480
481     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*59);
482 }