65a1f32a72f9e96c5c4c0d37816ab4ca6a9276cd
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_c / nb_kernel_ElecEw_VdwCSTab_GeomW4P1_c.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS c kernel generator.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "gromacs/math/vec.h"
47 #include "nrnb.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c
51  * Electrostatics interaction: Ewald
52  * VdW interaction:            CubicSplineTable
53  * Geometry:                   Water4-Particle
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     int              i_shift_offset,i_coord_offset,j_coord_offset;
67     int              j_index_start,j_index_end;
68     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
69     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
70     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
71     real             *shiftvec,*fshift,*x,*f;
72     int              vdwioffset0;
73     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
74     int              vdwioffset1;
75     real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
76     int              vdwioffset2;
77     real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
78     int              vdwioffset3;
79     real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
80     int              vdwjidx0;
81     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
82     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
83     real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
84     real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
85     real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
86     real             velec,felec,velecsum,facel,crf,krf,krf2;
87     real             *charge;
88     int              nvdwtype;
89     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
90     int              *vdwtype;
91     real             *vdwparam;
92     int              vfitab;
93     real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
94     real             *vftab;
95     int              ewitab;
96     real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
97     real             *ewtab;
98
99     x                = xx[0];
100     f                = ff[0];
101
102     nri              = nlist->nri;
103     iinr             = nlist->iinr;
104     jindex           = nlist->jindex;
105     jjnr             = nlist->jjnr;
106     shiftidx         = nlist->shift;
107     gid              = nlist->gid;
108     shiftvec         = fr->shift_vec[0];
109     fshift           = fr->fshift[0];
110     facel            = fr->epsfac;
111     charge           = mdatoms->chargeA;
112     nvdwtype         = fr->ntype;
113     vdwparam         = fr->nbfp;
114     vdwtype          = mdatoms->typeA;
115
116     vftab            = kernel_data->table_vdw->data;
117     vftabscale       = kernel_data->table_vdw->scale;
118
119     sh_ewald         = fr->ic->sh_ewald;
120     ewtab            = fr->ic->tabq_coul_FDV0;
121     ewtabscale       = fr->ic->tabq_scale;
122     ewtabhalfspace   = 0.5/ewtabscale;
123
124     /* Setup water-specific parameters */
125     inr              = nlist->iinr[0];
126     iq1              = facel*charge[inr+1];
127     iq2              = facel*charge[inr+2];
128     iq3              = facel*charge[inr+3];
129     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
130
131     outeriter        = 0;
132     inneriter        = 0;
133
134     /* Start outer loop over neighborlists */
135     for(iidx=0; iidx<nri; iidx++)
136     {
137         /* Load shift vector for this list */
138         i_shift_offset   = DIM*shiftidx[iidx];
139         shX              = shiftvec[i_shift_offset+XX];
140         shY              = shiftvec[i_shift_offset+YY];
141         shZ              = shiftvec[i_shift_offset+ZZ];
142
143         /* Load limits for loop over neighbors */
144         j_index_start    = jindex[iidx];
145         j_index_end      = jindex[iidx+1];
146
147         /* Get outer coordinate index */
148         inr              = iinr[iidx];
149         i_coord_offset   = DIM*inr;
150
151         /* Load i particle coords and add shift vector */
152         ix0              = shX + x[i_coord_offset+DIM*0+XX];
153         iy0              = shY + x[i_coord_offset+DIM*0+YY];
154         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
155         ix1              = shX + x[i_coord_offset+DIM*1+XX];
156         iy1              = shY + x[i_coord_offset+DIM*1+YY];
157         iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
158         ix2              = shX + x[i_coord_offset+DIM*2+XX];
159         iy2              = shY + x[i_coord_offset+DIM*2+YY];
160         iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
161         ix3              = shX + x[i_coord_offset+DIM*3+XX];
162         iy3              = shY + x[i_coord_offset+DIM*3+YY];
163         iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
164
165         fix0             = 0.0;
166         fiy0             = 0.0;
167         fiz0             = 0.0;
168         fix1             = 0.0;
169         fiy1             = 0.0;
170         fiz1             = 0.0;
171         fix2             = 0.0;
172         fiy2             = 0.0;
173         fiz2             = 0.0;
174         fix3             = 0.0;
175         fiy3             = 0.0;
176         fiz3             = 0.0;
177
178         /* Reset potential sums */
179         velecsum         = 0.0;
180         vvdwsum          = 0.0;
181
182         /* Start inner kernel loop */
183         for(jidx=j_index_start; jidx<j_index_end; jidx++)
184         {
185             /* Get j neighbor index, and coordinate index */
186             jnr              = jjnr[jidx];
187             j_coord_offset   = DIM*jnr;
188
189             /* load j atom coordinates */
190             jx0              = x[j_coord_offset+DIM*0+XX];
191             jy0              = x[j_coord_offset+DIM*0+YY];
192             jz0              = x[j_coord_offset+DIM*0+ZZ];
193
194             /* Calculate displacement vector */
195             dx00             = ix0 - jx0;
196             dy00             = iy0 - jy0;
197             dz00             = iz0 - jz0;
198             dx10             = ix1 - jx0;
199             dy10             = iy1 - jy0;
200             dz10             = iz1 - jz0;
201             dx20             = ix2 - jx0;
202             dy20             = iy2 - jy0;
203             dz20             = iz2 - jz0;
204             dx30             = ix3 - jx0;
205             dy30             = iy3 - jy0;
206             dz30             = iz3 - jz0;
207
208             /* Calculate squared distance and things based on it */
209             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
210             rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
211             rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
212             rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
213
214             rinv00           = gmx_invsqrt(rsq00);
215             rinv10           = gmx_invsqrt(rsq10);
216             rinv20           = gmx_invsqrt(rsq20);
217             rinv30           = gmx_invsqrt(rsq30);
218
219             rinvsq10         = rinv10*rinv10;
220             rinvsq20         = rinv20*rinv20;
221             rinvsq30         = rinv30*rinv30;
222
223             /* Load parameters for j particles */
224             jq0              = charge[jnr+0];
225             vdwjidx0         = 2*vdwtype[jnr+0];
226
227             /**************************
228              * CALCULATE INTERACTIONS *
229              **************************/
230
231             r00              = rsq00*rinv00;
232
233             c6_00            = vdwparam[vdwioffset0+vdwjidx0];
234             c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
235
236             /* Calculate table index by multiplying r with table scale and truncate to integer */
237             rt               = r00*vftabscale;
238             vfitab           = rt;
239             vfeps            = rt-vfitab;
240             vfitab           = 2*4*vfitab;
241
242             /* CUBIC SPLINE TABLE DISPERSION */
243             vfitab          += 0;
244             Y                = vftab[vfitab];
245             F                = vftab[vfitab+1];
246             Geps             = vfeps*vftab[vfitab+2];
247             Heps2            = vfeps*vfeps*vftab[vfitab+3];
248             Fp               = F+Geps+Heps2;
249             VV               = Y+vfeps*Fp;
250             vvdw6            = c6_00*VV;
251             FF               = Fp+Geps+2.0*Heps2;
252             fvdw6            = c6_00*FF;
253
254             /* CUBIC SPLINE TABLE REPULSION */
255             Y                = vftab[vfitab+4];
256             F                = vftab[vfitab+5];
257             Geps             = vfeps*vftab[vfitab+6];
258             Heps2            = vfeps*vfeps*vftab[vfitab+7];
259             Fp               = F+Geps+Heps2;
260             VV               = Y+vfeps*Fp;
261             vvdw12           = c12_00*VV;
262             FF               = Fp+Geps+2.0*Heps2;
263             fvdw12           = c12_00*FF;
264             vvdw             = vvdw12+vvdw6;
265             fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
266
267             /* Update potential sums from outer loop */
268             vvdwsum         += vvdw;
269
270             fscal            = fvdw;
271
272             /* Calculate temporary vectorial force */
273             tx               = fscal*dx00;
274             ty               = fscal*dy00;
275             tz               = fscal*dz00;
276
277             /* Update vectorial force */
278             fix0            += tx;
279             fiy0            += ty;
280             fiz0            += tz;
281             f[j_coord_offset+DIM*0+XX] -= tx;
282             f[j_coord_offset+DIM*0+YY] -= ty;
283             f[j_coord_offset+DIM*0+ZZ] -= tz;
284
285             /**************************
286              * CALCULATE INTERACTIONS *
287              **************************/
288
289             r10              = rsq10*rinv10;
290
291             qq10             = iq1*jq0;
292
293             /* EWALD ELECTROSTATICS */
294
295             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
296             ewrt             = r10*ewtabscale;
297             ewitab           = ewrt;
298             eweps            = ewrt-ewitab;
299             ewitab           = 4*ewitab;
300             felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
301             velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
302             felec            = qq10*rinv10*(rinvsq10-felec);
303
304             /* Update potential sums from outer loop */
305             velecsum        += velec;
306
307             fscal            = felec;
308
309             /* Calculate temporary vectorial force */
310             tx               = fscal*dx10;
311             ty               = fscal*dy10;
312             tz               = fscal*dz10;
313
314             /* Update vectorial force */
315             fix1            += tx;
316             fiy1            += ty;
317             fiz1            += tz;
318             f[j_coord_offset+DIM*0+XX] -= tx;
319             f[j_coord_offset+DIM*0+YY] -= ty;
320             f[j_coord_offset+DIM*0+ZZ] -= tz;
321
322             /**************************
323              * CALCULATE INTERACTIONS *
324              **************************/
325
326             r20              = rsq20*rinv20;
327
328             qq20             = iq2*jq0;
329
330             /* EWALD ELECTROSTATICS */
331
332             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
333             ewrt             = r20*ewtabscale;
334             ewitab           = ewrt;
335             eweps            = ewrt-ewitab;
336             ewitab           = 4*ewitab;
337             felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
338             velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
339             felec            = qq20*rinv20*(rinvsq20-felec);
340
341             /* Update potential sums from outer loop */
342             velecsum        += velec;
343
344             fscal            = felec;
345
346             /* Calculate temporary vectorial force */
347             tx               = fscal*dx20;
348             ty               = fscal*dy20;
349             tz               = fscal*dz20;
350
351             /* Update vectorial force */
352             fix2            += tx;
353             fiy2            += ty;
354             fiz2            += tz;
355             f[j_coord_offset+DIM*0+XX] -= tx;
356             f[j_coord_offset+DIM*0+YY] -= ty;
357             f[j_coord_offset+DIM*0+ZZ] -= tz;
358
359             /**************************
360              * CALCULATE INTERACTIONS *
361              **************************/
362
363             r30              = rsq30*rinv30;
364
365             qq30             = iq3*jq0;
366
367             /* EWALD ELECTROSTATICS */
368
369             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
370             ewrt             = r30*ewtabscale;
371             ewitab           = ewrt;
372             eweps            = ewrt-ewitab;
373             ewitab           = 4*ewitab;
374             felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
375             velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
376             felec            = qq30*rinv30*(rinvsq30-felec);
377
378             /* Update potential sums from outer loop */
379             velecsum        += velec;
380
381             fscal            = felec;
382
383             /* Calculate temporary vectorial force */
384             tx               = fscal*dx30;
385             ty               = fscal*dy30;
386             tz               = fscal*dz30;
387
388             /* Update vectorial force */
389             fix3            += tx;
390             fiy3            += ty;
391             fiz3            += tz;
392             f[j_coord_offset+DIM*0+XX] -= tx;
393             f[j_coord_offset+DIM*0+YY] -= ty;
394             f[j_coord_offset+DIM*0+ZZ] -= tz;
395
396             /* Inner loop uses 178 flops */
397         }
398         /* End of innermost loop */
399
400         tx = ty = tz = 0;
401         f[i_coord_offset+DIM*0+XX] += fix0;
402         f[i_coord_offset+DIM*0+YY] += fiy0;
403         f[i_coord_offset+DIM*0+ZZ] += fiz0;
404         tx                         += fix0;
405         ty                         += fiy0;
406         tz                         += fiz0;
407         f[i_coord_offset+DIM*1+XX] += fix1;
408         f[i_coord_offset+DIM*1+YY] += fiy1;
409         f[i_coord_offset+DIM*1+ZZ] += fiz1;
410         tx                         += fix1;
411         ty                         += fiy1;
412         tz                         += fiz1;
413         f[i_coord_offset+DIM*2+XX] += fix2;
414         f[i_coord_offset+DIM*2+YY] += fiy2;
415         f[i_coord_offset+DIM*2+ZZ] += fiz2;
416         tx                         += fix2;
417         ty                         += fiy2;
418         tz                         += fiz2;
419         f[i_coord_offset+DIM*3+XX] += fix3;
420         f[i_coord_offset+DIM*3+YY] += fiy3;
421         f[i_coord_offset+DIM*3+ZZ] += fiz3;
422         tx                         += fix3;
423         ty                         += fiy3;
424         tz                         += fiz3;
425         fshift[i_shift_offset+XX]  += tx;
426         fshift[i_shift_offset+YY]  += ty;
427         fshift[i_shift_offset+ZZ]  += tz;
428
429         ggid                        = gid[iidx];
430         /* Update potential energies */
431         kernel_data->energygrp_elec[ggid] += velecsum;
432         kernel_data->energygrp_vdw[ggid] += vvdwsum;
433
434         /* Increment number of inner iterations */
435         inneriter                  += j_index_end - j_index_start;
436
437         /* Outer loop uses 41 flops */
438     }
439
440     /* Increment number of outer iterations */
441     outeriter        += nri;
442
443     /* Update outer/inner flops */
444
445     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*178);
446 }
447 /*
448  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c
449  * Electrostatics interaction: Ewald
450  * VdW interaction:            CubicSplineTable
451  * Geometry:                   Water4-Particle
452  * Calculate force/pot:        Force
453  */
454 void
455 nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c
456                     (t_nblist                    * gmx_restrict       nlist,
457                      rvec                        * gmx_restrict          xx,
458                      rvec                        * gmx_restrict          ff,
459                      t_forcerec                  * gmx_restrict          fr,
460                      t_mdatoms                   * gmx_restrict     mdatoms,
461                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
462                      t_nrnb                      * gmx_restrict        nrnb)
463 {
464     int              i_shift_offset,i_coord_offset,j_coord_offset;
465     int              j_index_start,j_index_end;
466     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
467     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
468     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
469     real             *shiftvec,*fshift,*x,*f;
470     int              vdwioffset0;
471     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
472     int              vdwioffset1;
473     real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
474     int              vdwioffset2;
475     real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
476     int              vdwioffset3;
477     real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
478     int              vdwjidx0;
479     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
480     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
481     real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
482     real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
483     real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
484     real             velec,felec,velecsum,facel,crf,krf,krf2;
485     real             *charge;
486     int              nvdwtype;
487     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
488     int              *vdwtype;
489     real             *vdwparam;
490     int              vfitab;
491     real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
492     real             *vftab;
493     int              ewitab;
494     real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
495     real             *ewtab;
496
497     x                = xx[0];
498     f                = ff[0];
499
500     nri              = nlist->nri;
501     iinr             = nlist->iinr;
502     jindex           = nlist->jindex;
503     jjnr             = nlist->jjnr;
504     shiftidx         = nlist->shift;
505     gid              = nlist->gid;
506     shiftvec         = fr->shift_vec[0];
507     fshift           = fr->fshift[0];
508     facel            = fr->epsfac;
509     charge           = mdatoms->chargeA;
510     nvdwtype         = fr->ntype;
511     vdwparam         = fr->nbfp;
512     vdwtype          = mdatoms->typeA;
513
514     vftab            = kernel_data->table_vdw->data;
515     vftabscale       = kernel_data->table_vdw->scale;
516
517     sh_ewald         = fr->ic->sh_ewald;
518     ewtab            = fr->ic->tabq_coul_F;
519     ewtabscale       = fr->ic->tabq_scale;
520     ewtabhalfspace   = 0.5/ewtabscale;
521
522     /* Setup water-specific parameters */
523     inr              = nlist->iinr[0];
524     iq1              = facel*charge[inr+1];
525     iq2              = facel*charge[inr+2];
526     iq3              = facel*charge[inr+3];
527     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
528
529     outeriter        = 0;
530     inneriter        = 0;
531
532     /* Start outer loop over neighborlists */
533     for(iidx=0; iidx<nri; iidx++)
534     {
535         /* Load shift vector for this list */
536         i_shift_offset   = DIM*shiftidx[iidx];
537         shX              = shiftvec[i_shift_offset+XX];
538         shY              = shiftvec[i_shift_offset+YY];
539         shZ              = shiftvec[i_shift_offset+ZZ];
540
541         /* Load limits for loop over neighbors */
542         j_index_start    = jindex[iidx];
543         j_index_end      = jindex[iidx+1];
544
545         /* Get outer coordinate index */
546         inr              = iinr[iidx];
547         i_coord_offset   = DIM*inr;
548
549         /* Load i particle coords and add shift vector */
550         ix0              = shX + x[i_coord_offset+DIM*0+XX];
551         iy0              = shY + x[i_coord_offset+DIM*0+YY];
552         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
553         ix1              = shX + x[i_coord_offset+DIM*1+XX];
554         iy1              = shY + x[i_coord_offset+DIM*1+YY];
555         iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
556         ix2              = shX + x[i_coord_offset+DIM*2+XX];
557         iy2              = shY + x[i_coord_offset+DIM*2+YY];
558         iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
559         ix3              = shX + x[i_coord_offset+DIM*3+XX];
560         iy3              = shY + x[i_coord_offset+DIM*3+YY];
561         iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
562
563         fix0             = 0.0;
564         fiy0             = 0.0;
565         fiz0             = 0.0;
566         fix1             = 0.0;
567         fiy1             = 0.0;
568         fiz1             = 0.0;
569         fix2             = 0.0;
570         fiy2             = 0.0;
571         fiz2             = 0.0;
572         fix3             = 0.0;
573         fiy3             = 0.0;
574         fiz3             = 0.0;
575
576         /* Start inner kernel loop */
577         for(jidx=j_index_start; jidx<j_index_end; jidx++)
578         {
579             /* Get j neighbor index, and coordinate index */
580             jnr              = jjnr[jidx];
581             j_coord_offset   = DIM*jnr;
582
583             /* load j atom coordinates */
584             jx0              = x[j_coord_offset+DIM*0+XX];
585             jy0              = x[j_coord_offset+DIM*0+YY];
586             jz0              = x[j_coord_offset+DIM*0+ZZ];
587
588             /* Calculate displacement vector */
589             dx00             = ix0 - jx0;
590             dy00             = iy0 - jy0;
591             dz00             = iz0 - jz0;
592             dx10             = ix1 - jx0;
593             dy10             = iy1 - jy0;
594             dz10             = iz1 - jz0;
595             dx20             = ix2 - jx0;
596             dy20             = iy2 - jy0;
597             dz20             = iz2 - jz0;
598             dx30             = ix3 - jx0;
599             dy30             = iy3 - jy0;
600             dz30             = iz3 - jz0;
601
602             /* Calculate squared distance and things based on it */
603             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
604             rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
605             rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
606             rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
607
608             rinv00           = gmx_invsqrt(rsq00);
609             rinv10           = gmx_invsqrt(rsq10);
610             rinv20           = gmx_invsqrt(rsq20);
611             rinv30           = gmx_invsqrt(rsq30);
612
613             rinvsq10         = rinv10*rinv10;
614             rinvsq20         = rinv20*rinv20;
615             rinvsq30         = rinv30*rinv30;
616
617             /* Load parameters for j particles */
618             jq0              = charge[jnr+0];
619             vdwjidx0         = 2*vdwtype[jnr+0];
620
621             /**************************
622              * CALCULATE INTERACTIONS *
623              **************************/
624
625             r00              = rsq00*rinv00;
626
627             c6_00            = vdwparam[vdwioffset0+vdwjidx0];
628             c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
629
630             /* Calculate table index by multiplying r with table scale and truncate to integer */
631             rt               = r00*vftabscale;
632             vfitab           = rt;
633             vfeps            = rt-vfitab;
634             vfitab           = 2*4*vfitab;
635
636             /* CUBIC SPLINE TABLE DISPERSION */
637             vfitab          += 0;
638             F                = vftab[vfitab+1];
639             Geps             = vfeps*vftab[vfitab+2];
640             Heps2            = vfeps*vfeps*vftab[vfitab+3];
641             Fp               = F+Geps+Heps2;
642             FF               = Fp+Geps+2.0*Heps2;
643             fvdw6            = c6_00*FF;
644
645             /* CUBIC SPLINE TABLE REPULSION */
646             F                = vftab[vfitab+5];
647             Geps             = vfeps*vftab[vfitab+6];
648             Heps2            = vfeps*vfeps*vftab[vfitab+7];
649             Fp               = F+Geps+Heps2;
650             FF               = Fp+Geps+2.0*Heps2;
651             fvdw12           = c12_00*FF;
652             fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
653
654             fscal            = fvdw;
655
656             /* Calculate temporary vectorial force */
657             tx               = fscal*dx00;
658             ty               = fscal*dy00;
659             tz               = fscal*dz00;
660
661             /* Update vectorial force */
662             fix0            += tx;
663             fiy0            += ty;
664             fiz0            += tz;
665             f[j_coord_offset+DIM*0+XX] -= tx;
666             f[j_coord_offset+DIM*0+YY] -= ty;
667             f[j_coord_offset+DIM*0+ZZ] -= tz;
668
669             /**************************
670              * CALCULATE INTERACTIONS *
671              **************************/
672
673             r10              = rsq10*rinv10;
674
675             qq10             = iq1*jq0;
676
677             /* EWALD ELECTROSTATICS */
678
679             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
680             ewrt             = r10*ewtabscale;
681             ewitab           = ewrt;
682             eweps            = ewrt-ewitab;
683             felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
684             felec            = qq10*rinv10*(rinvsq10-felec);
685
686             fscal            = felec;
687
688             /* Calculate temporary vectorial force */
689             tx               = fscal*dx10;
690             ty               = fscal*dy10;
691             tz               = fscal*dz10;
692
693             /* Update vectorial force */
694             fix1            += tx;
695             fiy1            += ty;
696             fiz1            += tz;
697             f[j_coord_offset+DIM*0+XX] -= tx;
698             f[j_coord_offset+DIM*0+YY] -= ty;
699             f[j_coord_offset+DIM*0+ZZ] -= tz;
700
701             /**************************
702              * CALCULATE INTERACTIONS *
703              **************************/
704
705             r20              = rsq20*rinv20;
706
707             qq20             = iq2*jq0;
708
709             /* EWALD ELECTROSTATICS */
710
711             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
712             ewrt             = r20*ewtabscale;
713             ewitab           = ewrt;
714             eweps            = ewrt-ewitab;
715             felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
716             felec            = qq20*rinv20*(rinvsq20-felec);
717
718             fscal            = felec;
719
720             /* Calculate temporary vectorial force */
721             tx               = fscal*dx20;
722             ty               = fscal*dy20;
723             tz               = fscal*dz20;
724
725             /* Update vectorial force */
726             fix2            += tx;
727             fiy2            += ty;
728             fiz2            += tz;
729             f[j_coord_offset+DIM*0+XX] -= tx;
730             f[j_coord_offset+DIM*0+YY] -= ty;
731             f[j_coord_offset+DIM*0+ZZ] -= tz;
732
733             /**************************
734              * CALCULATE INTERACTIONS *
735              **************************/
736
737             r30              = rsq30*rinv30;
738
739             qq30             = iq3*jq0;
740
741             /* EWALD ELECTROSTATICS */
742
743             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
744             ewrt             = r30*ewtabscale;
745             ewitab           = ewrt;
746             eweps            = ewrt-ewitab;
747             felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
748             felec            = qq30*rinv30*(rinvsq30-felec);
749
750             fscal            = felec;
751
752             /* Calculate temporary vectorial force */
753             tx               = fscal*dx30;
754             ty               = fscal*dy30;
755             tz               = fscal*dz30;
756
757             /* Update vectorial force */
758             fix3            += tx;
759             fiy3            += ty;
760             fiz3            += tz;
761             f[j_coord_offset+DIM*0+XX] -= tx;
762             f[j_coord_offset+DIM*0+YY] -= ty;
763             f[j_coord_offset+DIM*0+ZZ] -= tz;
764
765             /* Inner loop uses 149 flops */
766         }
767         /* End of innermost loop */
768
769         tx = ty = tz = 0;
770         f[i_coord_offset+DIM*0+XX] += fix0;
771         f[i_coord_offset+DIM*0+YY] += fiy0;
772         f[i_coord_offset+DIM*0+ZZ] += fiz0;
773         tx                         += fix0;
774         ty                         += fiy0;
775         tz                         += fiz0;
776         f[i_coord_offset+DIM*1+XX] += fix1;
777         f[i_coord_offset+DIM*1+YY] += fiy1;
778         f[i_coord_offset+DIM*1+ZZ] += fiz1;
779         tx                         += fix1;
780         ty                         += fiy1;
781         tz                         += fiz1;
782         f[i_coord_offset+DIM*2+XX] += fix2;
783         f[i_coord_offset+DIM*2+YY] += fiy2;
784         f[i_coord_offset+DIM*2+ZZ] += fiz2;
785         tx                         += fix2;
786         ty                         += fiy2;
787         tz                         += fiz2;
788         f[i_coord_offset+DIM*3+XX] += fix3;
789         f[i_coord_offset+DIM*3+YY] += fiy3;
790         f[i_coord_offset+DIM*3+ZZ] += fiz3;
791         tx                         += fix3;
792         ty                         += fiy3;
793         tz                         += fiz3;
794         fshift[i_shift_offset+XX]  += tx;
795         fshift[i_shift_offset+YY]  += ty;
796         fshift[i_shift_offset+ZZ]  += tz;
797
798         /* Increment number of inner iterations */
799         inneriter                  += j_index_end - j_index_start;
800
801         /* Outer loop uses 39 flops */
802     }
803
804     /* Increment number of outer iterations */
805     outeriter        += nri;
806
807     /* Update outer/inner flops */
808
809     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*149);
810 }