Fix component for libcudart
[alexxy/gromacs.git] / src / gmxlib / nonbonded / nb_kernel_c / nb_kernel_ElecEw_VdwBham_GeomW4P1_c.c
1 /*
2  * Note: this file was generated by the Gromacs c kernel generator.
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  * Copyright (c) 2001-2012, The GROMACS Development Team
9  *
10  * Gromacs is a library for molecular simulation and trajectory analysis,
11  * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
12  * a full list of developers and information, check out http://www.gromacs.org
13  *
14  * This program is free software; you can redistribute it and/or modify it under
15  * the terms of the GNU Lesser General Public License as published by the Free
16  * Software Foundation; either version 2 of the License, or (at your option) any
17  * later version.
18  *
19  * To help fund GROMACS development, we humbly ask that you cite
20  * the papers people have written on it - you can find them on the website.
21  */
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #include <math.h>
27
28 #include "../nb_kernel.h"
29 #include "types/simple.h"
30 #include "vec.h"
31 #include "nrnb.h"
32
33 /*
34  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c
35  * Electrostatics interaction: Ewald
36  * VdW interaction:            Buckingham
37  * Geometry:                   Water4-Particle
38  * Calculate force/pot:        PotentialAndForce
39  */
40 void
41 nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c
42                     (t_nblist * gmx_restrict                nlist,
43                      rvec * gmx_restrict                    xx,
44                      rvec * gmx_restrict                    ff,
45                      t_forcerec * gmx_restrict              fr,
46                      t_mdatoms * gmx_restrict               mdatoms,
47                      nb_kernel_data_t * gmx_restrict        kernel_data,
48                      t_nrnb * gmx_restrict                  nrnb)
49 {
50     int              i_shift_offset,i_coord_offset,j_coord_offset;
51     int              j_index_start,j_index_end;
52     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
53     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
54     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
55     real             *shiftvec,*fshift,*x,*f;
56     int              vdwioffset0;
57     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
58     int              vdwioffset1;
59     real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
60     int              vdwioffset2;
61     real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
62     int              vdwioffset3;
63     real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
64     int              vdwjidx0;
65     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
66     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
67     real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
68     real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
69     real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
70     real             velec,felec,velecsum,facel,crf,krf,krf2;
71     real             *charge;
72     int              nvdwtype;
73     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
74     int              *vdwtype;
75     real             *vdwparam;
76     int              ewitab;
77     real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
78     real             *ewtab;
79
80     x                = xx[0];
81     f                = ff[0];
82
83     nri              = nlist->nri;
84     iinr             = nlist->iinr;
85     jindex           = nlist->jindex;
86     jjnr             = nlist->jjnr;
87     shiftidx         = nlist->shift;
88     gid              = nlist->gid;
89     shiftvec         = fr->shift_vec[0];
90     fshift           = fr->fshift[0];
91     facel            = fr->epsfac;
92     charge           = mdatoms->chargeA;
93     nvdwtype         = fr->ntype;
94     vdwparam         = fr->nbfp;
95     vdwtype          = mdatoms->typeA;
96
97     sh_ewald         = fr->ic->sh_ewald;
98     ewtab            = fr->ic->tabq_coul_FDV0;
99     ewtabscale       = fr->ic->tabq_scale;
100     ewtabhalfspace   = 0.5/ewtabscale;
101
102     /* Setup water-specific parameters */
103     inr              = nlist->iinr[0];
104     iq1              = facel*charge[inr+1];
105     iq2              = facel*charge[inr+2];
106     iq3              = facel*charge[inr+3];
107     vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
108
109     outeriter        = 0;
110     inneriter        = 0;
111
112     /* Start outer loop over neighborlists */
113     for(iidx=0; iidx<nri; iidx++)
114     {
115         /* Load shift vector for this list */
116         i_shift_offset   = DIM*shiftidx[iidx];
117         shX              = shiftvec[i_shift_offset+XX];
118         shY              = shiftvec[i_shift_offset+YY];
119         shZ              = shiftvec[i_shift_offset+ZZ];
120
121         /* Load limits for loop over neighbors */
122         j_index_start    = jindex[iidx];
123         j_index_end      = jindex[iidx+1];
124
125         /* Get outer coordinate index */
126         inr              = iinr[iidx];
127         i_coord_offset   = DIM*inr;
128
129         /* Load i particle coords and add shift vector */
130         ix0              = shX + x[i_coord_offset+DIM*0+XX];
131         iy0              = shY + x[i_coord_offset+DIM*0+YY];
132         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
133         ix1              = shX + x[i_coord_offset+DIM*1+XX];
134         iy1              = shY + x[i_coord_offset+DIM*1+YY];
135         iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
136         ix2              = shX + x[i_coord_offset+DIM*2+XX];
137         iy2              = shY + x[i_coord_offset+DIM*2+YY];
138         iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
139         ix3              = shX + x[i_coord_offset+DIM*3+XX];
140         iy3              = shY + x[i_coord_offset+DIM*3+YY];
141         iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
142
143         fix0             = 0.0;
144         fiy0             = 0.0;
145         fiz0             = 0.0;
146         fix1             = 0.0;
147         fiy1             = 0.0;
148         fiz1             = 0.0;
149         fix2             = 0.0;
150         fiy2             = 0.0;
151         fiz2             = 0.0;
152         fix3             = 0.0;
153         fiy3             = 0.0;
154         fiz3             = 0.0;
155
156         /* Reset potential sums */
157         velecsum         = 0.0;
158         vvdwsum          = 0.0;
159
160         /* Start inner kernel loop */
161         for(jidx=j_index_start; jidx<j_index_end; jidx++)
162         {
163             /* Get j neighbor index, and coordinate index */
164             jnr              = jjnr[jidx];
165             j_coord_offset   = DIM*jnr;
166
167             /* load j atom coordinates */
168             jx0              = x[j_coord_offset+DIM*0+XX];
169             jy0              = x[j_coord_offset+DIM*0+YY];
170             jz0              = x[j_coord_offset+DIM*0+ZZ];
171
172             /* Calculate displacement vector */
173             dx00             = ix0 - jx0;
174             dy00             = iy0 - jy0;
175             dz00             = iz0 - jz0;
176             dx10             = ix1 - jx0;
177             dy10             = iy1 - jy0;
178             dz10             = iz1 - jz0;
179             dx20             = ix2 - jx0;
180             dy20             = iy2 - jy0;
181             dz20             = iz2 - jz0;
182             dx30             = ix3 - jx0;
183             dy30             = iy3 - jy0;
184             dz30             = iz3 - jz0;
185
186             /* Calculate squared distance and things based on it */
187             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
188             rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
189             rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
190             rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
191
192             rinv00           = gmx_invsqrt(rsq00);
193             rinv10           = gmx_invsqrt(rsq10);
194             rinv20           = gmx_invsqrt(rsq20);
195             rinv30           = gmx_invsqrt(rsq30);
196
197             rinvsq00         = rinv00*rinv00;
198             rinvsq10         = rinv10*rinv10;
199             rinvsq20         = rinv20*rinv20;
200             rinvsq30         = rinv30*rinv30;
201
202             /* Load parameters for j particles */
203             jq0              = charge[jnr+0];
204             vdwjidx0         = 3*vdwtype[jnr+0];
205
206             /**************************
207              * CALCULATE INTERACTIONS *
208              **************************/
209
210             r00              = rsq00*rinv00;
211
212             c6_00            = vdwparam[vdwioffset0+vdwjidx0];
213             cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
214             cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
215
216             /* BUCKINGHAM DISPERSION/REPULSION */
217             rinvsix          = rinvsq00*rinvsq00*rinvsq00;
218             vvdw6            = c6_00*rinvsix;
219             br               = cexp2_00*r00;
220             vvdwexp          = cexp1_00*exp(-br);
221             vvdw             = vvdwexp - vvdw6*(1.0/6.0);
222             fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
223
224             /* Update potential sums from outer loop */
225             vvdwsum         += vvdw;
226
227             fscal            = fvdw;
228
229             /* Calculate temporary vectorial force */
230             tx               = fscal*dx00;
231             ty               = fscal*dy00;
232             tz               = fscal*dz00;
233
234             /* Update vectorial force */
235             fix0            += tx;
236             fiy0            += ty;
237             fiz0            += tz;
238             f[j_coord_offset+DIM*0+XX] -= tx;
239             f[j_coord_offset+DIM*0+YY] -= ty;
240             f[j_coord_offset+DIM*0+ZZ] -= tz;
241
242             /**************************
243              * CALCULATE INTERACTIONS *
244              **************************/
245
246             r10              = rsq10*rinv10;
247
248             qq10             = iq1*jq0;
249
250             /* EWALD ELECTROSTATICS */
251
252             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
253             ewrt             = r10*ewtabscale;
254             ewitab           = ewrt;
255             eweps            = ewrt-ewitab;
256             ewitab           = 4*ewitab;
257             felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
258             velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
259             felec            = qq10*rinv10*(rinvsq10-felec);
260
261             /* Update potential sums from outer loop */
262             velecsum        += velec;
263
264             fscal            = felec;
265
266             /* Calculate temporary vectorial force */
267             tx               = fscal*dx10;
268             ty               = fscal*dy10;
269             tz               = fscal*dz10;
270
271             /* Update vectorial force */
272             fix1            += tx;
273             fiy1            += ty;
274             fiz1            += tz;
275             f[j_coord_offset+DIM*0+XX] -= tx;
276             f[j_coord_offset+DIM*0+YY] -= ty;
277             f[j_coord_offset+DIM*0+ZZ] -= tz;
278
279             /**************************
280              * CALCULATE INTERACTIONS *
281              **************************/
282
283             r20              = rsq20*rinv20;
284
285             qq20             = iq2*jq0;
286
287             /* EWALD ELECTROSTATICS */
288
289             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
290             ewrt             = r20*ewtabscale;
291             ewitab           = ewrt;
292             eweps            = ewrt-ewitab;
293             ewitab           = 4*ewitab;
294             felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
295             velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
296             felec            = qq20*rinv20*(rinvsq20-felec);
297
298             /* Update potential sums from outer loop */
299             velecsum        += velec;
300
301             fscal            = felec;
302
303             /* Calculate temporary vectorial force */
304             tx               = fscal*dx20;
305             ty               = fscal*dy20;
306             tz               = fscal*dz20;
307
308             /* Update vectorial force */
309             fix2            += tx;
310             fiy2            += ty;
311             fiz2            += tz;
312             f[j_coord_offset+DIM*0+XX] -= tx;
313             f[j_coord_offset+DIM*0+YY] -= ty;
314             f[j_coord_offset+DIM*0+ZZ] -= tz;
315
316             /**************************
317              * CALCULATE INTERACTIONS *
318              **************************/
319
320             r30              = rsq30*rinv30;
321
322             qq30             = iq3*jq0;
323
324             /* EWALD ELECTROSTATICS */
325
326             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
327             ewrt             = r30*ewtabscale;
328             ewitab           = ewrt;
329             eweps            = ewrt-ewitab;
330             ewitab           = 4*ewitab;
331             felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
332             velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
333             felec            = qq30*rinv30*(rinvsq30-felec);
334
335             /* Update potential sums from outer loop */
336             velecsum        += velec;
337
338             fscal            = felec;
339
340             /* Calculate temporary vectorial force */
341             tx               = fscal*dx30;
342             ty               = fscal*dy30;
343             tz               = fscal*dz30;
344
345             /* Update vectorial force */
346             fix3            += tx;
347             fiy3            += ty;
348             fiz3            += tz;
349             f[j_coord_offset+DIM*0+XX] -= tx;
350             f[j_coord_offset+DIM*0+YY] -= ty;
351             f[j_coord_offset+DIM*0+ZZ] -= tz;
352
353             /* Inner loop uses 184 flops */
354         }
355         /* End of innermost loop */
356
357         tx = ty = tz = 0;
358         f[i_coord_offset+DIM*0+XX] += fix0;
359         f[i_coord_offset+DIM*0+YY] += fiy0;
360         f[i_coord_offset+DIM*0+ZZ] += fiz0;
361         tx                         += fix0;
362         ty                         += fiy0;
363         tz                         += fiz0;
364         f[i_coord_offset+DIM*1+XX] += fix1;
365         f[i_coord_offset+DIM*1+YY] += fiy1;
366         f[i_coord_offset+DIM*1+ZZ] += fiz1;
367         tx                         += fix1;
368         ty                         += fiy1;
369         tz                         += fiz1;
370         f[i_coord_offset+DIM*2+XX] += fix2;
371         f[i_coord_offset+DIM*2+YY] += fiy2;
372         f[i_coord_offset+DIM*2+ZZ] += fiz2;
373         tx                         += fix2;
374         ty                         += fiy2;
375         tz                         += fiz2;
376         f[i_coord_offset+DIM*3+XX] += fix3;
377         f[i_coord_offset+DIM*3+YY] += fiy3;
378         f[i_coord_offset+DIM*3+ZZ] += fiz3;
379         tx                         += fix3;
380         ty                         += fiy3;
381         tz                         += fiz3;
382         fshift[i_shift_offset+XX]  += tx;
383         fshift[i_shift_offset+YY]  += ty;
384         fshift[i_shift_offset+ZZ]  += tz;
385
386         ggid                        = gid[iidx];
387         /* Update potential energies */
388         kernel_data->energygrp_elec[ggid] += velecsum;
389         kernel_data->energygrp_vdw[ggid] += vvdwsum;
390
391         /* Increment number of inner iterations */
392         inneriter                  += j_index_end - j_index_start;
393
394         /* Outer loop uses 41 flops */
395     }
396
397     /* Increment number of outer iterations */
398     outeriter        += nri;
399
400     /* Update outer/inner flops */
401
402     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*184);
403 }
404 /*
405  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c
406  * Electrostatics interaction: Ewald
407  * VdW interaction:            Buckingham
408  * Geometry:                   Water4-Particle
409  * Calculate force/pot:        Force
410  */
411 void
412 nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c
413                     (t_nblist * gmx_restrict                nlist,
414                      rvec * gmx_restrict                    xx,
415                      rvec * gmx_restrict                    ff,
416                      t_forcerec * gmx_restrict              fr,
417                      t_mdatoms * gmx_restrict               mdatoms,
418                      nb_kernel_data_t * gmx_restrict        kernel_data,
419                      t_nrnb * gmx_restrict                  nrnb)
420 {
421     int              i_shift_offset,i_coord_offset,j_coord_offset;
422     int              j_index_start,j_index_end;
423     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
424     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
425     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
426     real             *shiftvec,*fshift,*x,*f;
427     int              vdwioffset0;
428     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
429     int              vdwioffset1;
430     real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
431     int              vdwioffset2;
432     real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
433     int              vdwioffset3;
434     real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
435     int              vdwjidx0;
436     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
437     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
438     real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
439     real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
440     real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
441     real             velec,felec,velecsum,facel,crf,krf,krf2;
442     real             *charge;
443     int              nvdwtype;
444     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
445     int              *vdwtype;
446     real             *vdwparam;
447     int              ewitab;
448     real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
449     real             *ewtab;
450
451     x                = xx[0];
452     f                = ff[0];
453
454     nri              = nlist->nri;
455     iinr             = nlist->iinr;
456     jindex           = nlist->jindex;
457     jjnr             = nlist->jjnr;
458     shiftidx         = nlist->shift;
459     gid              = nlist->gid;
460     shiftvec         = fr->shift_vec[0];
461     fshift           = fr->fshift[0];
462     facel            = fr->epsfac;
463     charge           = mdatoms->chargeA;
464     nvdwtype         = fr->ntype;
465     vdwparam         = fr->nbfp;
466     vdwtype          = mdatoms->typeA;
467
468     sh_ewald         = fr->ic->sh_ewald;
469     ewtab            = fr->ic->tabq_coul_F;
470     ewtabscale       = fr->ic->tabq_scale;
471     ewtabhalfspace   = 0.5/ewtabscale;
472
473     /* Setup water-specific parameters */
474     inr              = nlist->iinr[0];
475     iq1              = facel*charge[inr+1];
476     iq2              = facel*charge[inr+2];
477     iq3              = facel*charge[inr+3];
478     vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
479
480     outeriter        = 0;
481     inneriter        = 0;
482
483     /* Start outer loop over neighborlists */
484     for(iidx=0; iidx<nri; iidx++)
485     {
486         /* Load shift vector for this list */
487         i_shift_offset   = DIM*shiftidx[iidx];
488         shX              = shiftvec[i_shift_offset+XX];
489         shY              = shiftvec[i_shift_offset+YY];
490         shZ              = shiftvec[i_shift_offset+ZZ];
491
492         /* Load limits for loop over neighbors */
493         j_index_start    = jindex[iidx];
494         j_index_end      = jindex[iidx+1];
495
496         /* Get outer coordinate index */
497         inr              = iinr[iidx];
498         i_coord_offset   = DIM*inr;
499
500         /* Load i particle coords and add shift vector */
501         ix0              = shX + x[i_coord_offset+DIM*0+XX];
502         iy0              = shY + x[i_coord_offset+DIM*0+YY];
503         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
504         ix1              = shX + x[i_coord_offset+DIM*1+XX];
505         iy1              = shY + x[i_coord_offset+DIM*1+YY];
506         iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
507         ix2              = shX + x[i_coord_offset+DIM*2+XX];
508         iy2              = shY + x[i_coord_offset+DIM*2+YY];
509         iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
510         ix3              = shX + x[i_coord_offset+DIM*3+XX];
511         iy3              = shY + x[i_coord_offset+DIM*3+YY];
512         iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
513
514         fix0             = 0.0;
515         fiy0             = 0.0;
516         fiz0             = 0.0;
517         fix1             = 0.0;
518         fiy1             = 0.0;
519         fiz1             = 0.0;
520         fix2             = 0.0;
521         fiy2             = 0.0;
522         fiz2             = 0.0;
523         fix3             = 0.0;
524         fiy3             = 0.0;
525         fiz3             = 0.0;
526
527         /* Start inner kernel loop */
528         for(jidx=j_index_start; jidx<j_index_end; jidx++)
529         {
530             /* Get j neighbor index, and coordinate index */
531             jnr              = jjnr[jidx];
532             j_coord_offset   = DIM*jnr;
533
534             /* load j atom coordinates */
535             jx0              = x[j_coord_offset+DIM*0+XX];
536             jy0              = x[j_coord_offset+DIM*0+YY];
537             jz0              = x[j_coord_offset+DIM*0+ZZ];
538
539             /* Calculate displacement vector */
540             dx00             = ix0 - jx0;
541             dy00             = iy0 - jy0;
542             dz00             = iz0 - jz0;
543             dx10             = ix1 - jx0;
544             dy10             = iy1 - jy0;
545             dz10             = iz1 - jz0;
546             dx20             = ix2 - jx0;
547             dy20             = iy2 - jy0;
548             dz20             = iz2 - jz0;
549             dx30             = ix3 - jx0;
550             dy30             = iy3 - jy0;
551             dz30             = iz3 - jz0;
552
553             /* Calculate squared distance and things based on it */
554             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
555             rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
556             rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
557             rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
558
559             rinv00           = gmx_invsqrt(rsq00);
560             rinv10           = gmx_invsqrt(rsq10);
561             rinv20           = gmx_invsqrt(rsq20);
562             rinv30           = gmx_invsqrt(rsq30);
563
564             rinvsq00         = rinv00*rinv00;
565             rinvsq10         = rinv10*rinv10;
566             rinvsq20         = rinv20*rinv20;
567             rinvsq30         = rinv30*rinv30;
568
569             /* Load parameters for j particles */
570             jq0              = charge[jnr+0];
571             vdwjidx0         = 3*vdwtype[jnr+0];
572
573             /**************************
574              * CALCULATE INTERACTIONS *
575              **************************/
576
577             r00              = rsq00*rinv00;
578
579             c6_00            = vdwparam[vdwioffset0+vdwjidx0];
580             cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
581             cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
582
583             /* BUCKINGHAM DISPERSION/REPULSION */
584             rinvsix          = rinvsq00*rinvsq00*rinvsq00;
585             vvdw6            = c6_00*rinvsix;
586             br               = cexp2_00*r00;
587             vvdwexp          = cexp1_00*exp(-br);
588             fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
589
590             fscal            = fvdw;
591
592             /* Calculate temporary vectorial force */
593             tx               = fscal*dx00;
594             ty               = fscal*dy00;
595             tz               = fscal*dz00;
596
597             /* Update vectorial force */
598             fix0            += tx;
599             fiy0            += ty;
600             fiz0            += tz;
601             f[j_coord_offset+DIM*0+XX] -= tx;
602             f[j_coord_offset+DIM*0+YY] -= ty;
603             f[j_coord_offset+DIM*0+ZZ] -= tz;
604
605             /**************************
606              * CALCULATE INTERACTIONS *
607              **************************/
608
609             r10              = rsq10*rinv10;
610
611             qq10             = iq1*jq0;
612
613             /* EWALD ELECTROSTATICS */
614
615             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
616             ewrt             = r10*ewtabscale;
617             ewitab           = ewrt;
618             eweps            = ewrt-ewitab;
619             felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
620             felec            = qq10*rinv10*(rinvsq10-felec);
621
622             fscal            = felec;
623
624             /* Calculate temporary vectorial force */
625             tx               = fscal*dx10;
626             ty               = fscal*dy10;
627             tz               = fscal*dz10;
628
629             /* Update vectorial force */
630             fix1            += tx;
631             fiy1            += ty;
632             fiz1            += tz;
633             f[j_coord_offset+DIM*0+XX] -= tx;
634             f[j_coord_offset+DIM*0+YY] -= ty;
635             f[j_coord_offset+DIM*0+ZZ] -= tz;
636
637             /**************************
638              * CALCULATE INTERACTIONS *
639              **************************/
640
641             r20              = rsq20*rinv20;
642
643             qq20             = iq2*jq0;
644
645             /* EWALD ELECTROSTATICS */
646
647             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
648             ewrt             = r20*ewtabscale;
649             ewitab           = ewrt;
650             eweps            = ewrt-ewitab;
651             felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
652             felec            = qq20*rinv20*(rinvsq20-felec);
653
654             fscal            = felec;
655
656             /* Calculate temporary vectorial force */
657             tx               = fscal*dx20;
658             ty               = fscal*dy20;
659             tz               = fscal*dz20;
660
661             /* Update vectorial force */
662             fix2            += tx;
663             fiy2            += ty;
664             fiz2            += tz;
665             f[j_coord_offset+DIM*0+XX] -= tx;
666             f[j_coord_offset+DIM*0+YY] -= ty;
667             f[j_coord_offset+DIM*0+ZZ] -= tz;
668
669             /**************************
670              * CALCULATE INTERACTIONS *
671              **************************/
672
673             r30              = rsq30*rinv30;
674
675             qq30             = iq3*jq0;
676
677             /* EWALD ELECTROSTATICS */
678
679             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
680             ewrt             = r30*ewtabscale;
681             ewitab           = ewrt;
682             eweps            = ewrt-ewitab;
683             felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
684             felec            = qq30*rinv30*(rinvsq30-felec);
685
686             fscal            = felec;
687
688             /* Calculate temporary vectorial force */
689             tx               = fscal*dx30;
690             ty               = fscal*dy30;
691             tz               = fscal*dz30;
692
693             /* Update vectorial force */
694             fix3            += tx;
695             fiy3            += ty;
696             fiz3            += tz;
697             f[j_coord_offset+DIM*0+XX] -= tx;
698             f[j_coord_offset+DIM*0+YY] -= ty;
699             f[j_coord_offset+DIM*0+ZZ] -= tz;
700
701             /* Inner loop uses 160 flops */
702         }
703         /* End of innermost loop */
704
705         tx = ty = tz = 0;
706         f[i_coord_offset+DIM*0+XX] += fix0;
707         f[i_coord_offset+DIM*0+YY] += fiy0;
708         f[i_coord_offset+DIM*0+ZZ] += fiz0;
709         tx                         += fix0;
710         ty                         += fiy0;
711         tz                         += fiz0;
712         f[i_coord_offset+DIM*1+XX] += fix1;
713         f[i_coord_offset+DIM*1+YY] += fiy1;
714         f[i_coord_offset+DIM*1+ZZ] += fiz1;
715         tx                         += fix1;
716         ty                         += fiy1;
717         tz                         += fiz1;
718         f[i_coord_offset+DIM*2+XX] += fix2;
719         f[i_coord_offset+DIM*2+YY] += fiy2;
720         f[i_coord_offset+DIM*2+ZZ] += fiz2;
721         tx                         += fix2;
722         ty                         += fiy2;
723         tz                         += fiz2;
724         f[i_coord_offset+DIM*3+XX] += fix3;
725         f[i_coord_offset+DIM*3+YY] += fiy3;
726         f[i_coord_offset+DIM*3+ZZ] += fiz3;
727         tx                         += fix3;
728         ty                         += fiy3;
729         tz                         += fiz3;
730         fshift[i_shift_offset+XX]  += tx;
731         fshift[i_shift_offset+YY]  += ty;
732         fshift[i_shift_offset+ZZ]  += tz;
733
734         /* Increment number of inner iterations */
735         inneriter                  += j_index_end - j_index_start;
736
737         /* Outer loop uses 39 flops */
738     }
739
740     /* Increment number of outer iterations */
741     outeriter        += nri;
742
743     /* Update outer/inner flops */
744
745     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*160);
746 }