Remove no-inline-max-size and suppress remark
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_c / nb_kernel_ElecEw_VdwLJEw_GeomW4P1_c.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS c kernel generator.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "vec.h"
47 #include "nrnb.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_c
51  * Electrostatics interaction: Ewald
52  * VdW interaction:            LJEwald
53  * Geometry:                   Water4-Particle
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_c
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     int              i_shift_offset,i_coord_offset,j_coord_offset;
67     int              j_index_start,j_index_end;
68     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
69     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
70     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
71     real             *shiftvec,*fshift,*x,*f;
72     int              vdwioffset0;
73     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
74     int              vdwioffset1;
75     real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
76     int              vdwioffset2;
77     real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
78     int              vdwioffset3;
79     real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
80     int              vdwjidx0;
81     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
82     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
83     real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
84     real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
85     real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
86     real             velec,felec,velecsum,facel,crf,krf,krf2;
87     real             *charge;
88     int              nvdwtype;
89     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
90     int              *vdwtype;
91     real             *vdwparam;
92     real             c6grid_00;
93     real             c6grid_10;
94     real             c6grid_20;
95     real             c6grid_30;
96     real             ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,sh_lj_ewald;
97     real             *vdwgridparam;
98     int              ewitab;
99     real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
100     real             *ewtab;
101
102     x                = xx[0];
103     f                = ff[0];
104
105     nri              = nlist->nri;
106     iinr             = nlist->iinr;
107     jindex           = nlist->jindex;
108     jjnr             = nlist->jjnr;
109     shiftidx         = nlist->shift;
110     gid              = nlist->gid;
111     shiftvec         = fr->shift_vec[0];
112     fshift           = fr->fshift[0];
113     facel            = fr->epsfac;
114     charge           = mdatoms->chargeA;
115     nvdwtype         = fr->ntype;
116     vdwparam         = fr->nbfp;
117     vdwtype          = mdatoms->typeA;
118     vdwgridparam     = fr->ljpme_c6grid;
119     ewclj            = fr->ewaldcoeff_lj;
120     sh_lj_ewald      = fr->ic->sh_lj_ewald;
121     ewclj2           = ewclj*ewclj;
122     ewclj6           = ewclj2*ewclj2*ewclj2;
123
124     sh_ewald         = fr->ic->sh_ewald;
125     ewtab            = fr->ic->tabq_coul_FDV0;
126     ewtabscale       = fr->ic->tabq_scale;
127     ewtabhalfspace   = 0.5/ewtabscale;
128
129     /* Setup water-specific parameters */
130     inr              = nlist->iinr[0];
131     iq1              = facel*charge[inr+1];
132     iq2              = facel*charge[inr+2];
133     iq3              = facel*charge[inr+3];
134     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
135
136     outeriter        = 0;
137     inneriter        = 0;
138
139     /* Start outer loop over neighborlists */
140     for(iidx=0; iidx<nri; iidx++)
141     {
142         /* Load shift vector for this list */
143         i_shift_offset   = DIM*shiftidx[iidx];
144         shX              = shiftvec[i_shift_offset+XX];
145         shY              = shiftvec[i_shift_offset+YY];
146         shZ              = shiftvec[i_shift_offset+ZZ];
147
148         /* Load limits for loop over neighbors */
149         j_index_start    = jindex[iidx];
150         j_index_end      = jindex[iidx+1];
151
152         /* Get outer coordinate index */
153         inr              = iinr[iidx];
154         i_coord_offset   = DIM*inr;
155
156         /* Load i particle coords and add shift vector */
157         ix0              = shX + x[i_coord_offset+DIM*0+XX];
158         iy0              = shY + x[i_coord_offset+DIM*0+YY];
159         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
160         ix1              = shX + x[i_coord_offset+DIM*1+XX];
161         iy1              = shY + x[i_coord_offset+DIM*1+YY];
162         iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
163         ix2              = shX + x[i_coord_offset+DIM*2+XX];
164         iy2              = shY + x[i_coord_offset+DIM*2+YY];
165         iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
166         ix3              = shX + x[i_coord_offset+DIM*3+XX];
167         iy3              = shY + x[i_coord_offset+DIM*3+YY];
168         iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
169
170         fix0             = 0.0;
171         fiy0             = 0.0;
172         fiz0             = 0.0;
173         fix1             = 0.0;
174         fiy1             = 0.0;
175         fiz1             = 0.0;
176         fix2             = 0.0;
177         fiy2             = 0.0;
178         fiz2             = 0.0;
179         fix3             = 0.0;
180         fiy3             = 0.0;
181         fiz3             = 0.0;
182
183         /* Reset potential sums */
184         velecsum         = 0.0;
185         vvdwsum          = 0.0;
186
187         /* Start inner kernel loop */
188         for(jidx=j_index_start; jidx<j_index_end; jidx++)
189         {
190             /* Get j neighbor index, and coordinate index */
191             jnr              = jjnr[jidx];
192             j_coord_offset   = DIM*jnr;
193
194             /* load j atom coordinates */
195             jx0              = x[j_coord_offset+DIM*0+XX];
196             jy0              = x[j_coord_offset+DIM*0+YY];
197             jz0              = x[j_coord_offset+DIM*0+ZZ];
198
199             /* Calculate displacement vector */
200             dx00             = ix0 - jx0;
201             dy00             = iy0 - jy0;
202             dz00             = iz0 - jz0;
203             dx10             = ix1 - jx0;
204             dy10             = iy1 - jy0;
205             dz10             = iz1 - jz0;
206             dx20             = ix2 - jx0;
207             dy20             = iy2 - jy0;
208             dz20             = iz2 - jz0;
209             dx30             = ix3 - jx0;
210             dy30             = iy3 - jy0;
211             dz30             = iz3 - jz0;
212
213             /* Calculate squared distance and things based on it */
214             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
215             rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
216             rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
217             rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
218
219             rinv00           = gmx_invsqrt(rsq00);
220             rinv10           = gmx_invsqrt(rsq10);
221             rinv20           = gmx_invsqrt(rsq20);
222             rinv30           = gmx_invsqrt(rsq30);
223
224             rinvsq00         = rinv00*rinv00;
225             rinvsq10         = rinv10*rinv10;
226             rinvsq20         = rinv20*rinv20;
227             rinvsq30         = rinv30*rinv30;
228
229             /* Load parameters for j particles */
230             jq0              = charge[jnr+0];
231             vdwjidx0         = 2*vdwtype[jnr+0];
232
233             /**************************
234              * CALCULATE INTERACTIONS *
235              **************************/
236
237             r00              = rsq00*rinv00;
238
239             c6_00            = vdwparam[vdwioffset0+vdwjidx0];
240             c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
241             c6grid_00        = vdwgridparam[vdwioffset0+vdwjidx0];
242
243             rinvsix          = rinvsq00*rinvsq00*rinvsq00;
244             ewcljrsq         = ewclj2*rsq00;
245             exponent         = exp(-ewcljrsq);
246             poly             = exponent*(1.0 + ewcljrsq + ewcljrsq*ewcljrsq*0.5);
247             vvdw6            = (c6_00-c6grid_00*(1.0-poly))*rinvsix;
248             vvdw12           = c12_00*rinvsix*rinvsix;
249             vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
250             fvdw             = (vvdw12 - vvdw6 - c6grid_00*(1.0/6.0)*exponent*ewclj6)*rinvsq00;
251
252             /* Update potential sums from outer loop */
253             vvdwsum         += vvdw;
254
255             fscal            = fvdw;
256
257             /* Calculate temporary vectorial force */
258             tx               = fscal*dx00;
259             ty               = fscal*dy00;
260             tz               = fscal*dz00;
261
262             /* Update vectorial force */
263             fix0            += tx;
264             fiy0            += ty;
265             fiz0            += tz;
266             f[j_coord_offset+DIM*0+XX] -= tx;
267             f[j_coord_offset+DIM*0+YY] -= ty;
268             f[j_coord_offset+DIM*0+ZZ] -= tz;
269
270             /**************************
271              * CALCULATE INTERACTIONS *
272              **************************/
273
274             r10              = rsq10*rinv10;
275
276             qq10             = iq1*jq0;
277
278             /* EWALD ELECTROSTATICS */
279
280             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
281             ewrt             = r10*ewtabscale;
282             ewitab           = ewrt;
283             eweps            = ewrt-ewitab;
284             ewitab           = 4*ewitab;
285             felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
286             velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
287             felec            = qq10*rinv10*(rinvsq10-felec);
288
289             /* Update potential sums from outer loop */
290             velecsum        += velec;
291
292             fscal            = felec;
293
294             /* Calculate temporary vectorial force */
295             tx               = fscal*dx10;
296             ty               = fscal*dy10;
297             tz               = fscal*dz10;
298
299             /* Update vectorial force */
300             fix1            += tx;
301             fiy1            += ty;
302             fiz1            += tz;
303             f[j_coord_offset+DIM*0+XX] -= tx;
304             f[j_coord_offset+DIM*0+YY] -= ty;
305             f[j_coord_offset+DIM*0+ZZ] -= tz;
306
307             /**************************
308              * CALCULATE INTERACTIONS *
309              **************************/
310
311             r20              = rsq20*rinv20;
312
313             qq20             = iq2*jq0;
314
315             /* EWALD ELECTROSTATICS */
316
317             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
318             ewrt             = r20*ewtabscale;
319             ewitab           = ewrt;
320             eweps            = ewrt-ewitab;
321             ewitab           = 4*ewitab;
322             felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
323             velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
324             felec            = qq20*rinv20*(rinvsq20-felec);
325
326             /* Update potential sums from outer loop */
327             velecsum        += velec;
328
329             fscal            = felec;
330
331             /* Calculate temporary vectorial force */
332             tx               = fscal*dx20;
333             ty               = fscal*dy20;
334             tz               = fscal*dz20;
335
336             /* Update vectorial force */
337             fix2            += tx;
338             fiy2            += ty;
339             fiz2            += tz;
340             f[j_coord_offset+DIM*0+XX] -= tx;
341             f[j_coord_offset+DIM*0+YY] -= ty;
342             f[j_coord_offset+DIM*0+ZZ] -= tz;
343
344             /**************************
345              * CALCULATE INTERACTIONS *
346              **************************/
347
348             r30              = rsq30*rinv30;
349
350             qq30             = iq3*jq0;
351
352             /* EWALD ELECTROSTATICS */
353
354             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
355             ewrt             = r30*ewtabscale;
356             ewitab           = ewrt;
357             eweps            = ewrt-ewitab;
358             ewitab           = 4*ewitab;
359             felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
360             velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
361             felec            = qq30*rinv30*(rinvsq30-felec);
362
363             /* Update potential sums from outer loop */
364             velecsum        += velec;
365
366             fscal            = felec;
367
368             /* Calculate temporary vectorial force */
369             tx               = fscal*dx30;
370             ty               = fscal*dy30;
371             tz               = fscal*dz30;
372
373             /* Update vectorial force */
374             fix3            += tx;
375             fiy3            += ty;
376             fiz3            += tz;
377             f[j_coord_offset+DIM*0+XX] -= tx;
378             f[j_coord_offset+DIM*0+YY] -= ty;
379             f[j_coord_offset+DIM*0+ZZ] -= tz;
380
381             /* Inner loop uses 172 flops */
382         }
383         /* End of innermost loop */
384
385         tx = ty = tz = 0;
386         f[i_coord_offset+DIM*0+XX] += fix0;
387         f[i_coord_offset+DIM*0+YY] += fiy0;
388         f[i_coord_offset+DIM*0+ZZ] += fiz0;
389         tx                         += fix0;
390         ty                         += fiy0;
391         tz                         += fiz0;
392         f[i_coord_offset+DIM*1+XX] += fix1;
393         f[i_coord_offset+DIM*1+YY] += fiy1;
394         f[i_coord_offset+DIM*1+ZZ] += fiz1;
395         tx                         += fix1;
396         ty                         += fiy1;
397         tz                         += fiz1;
398         f[i_coord_offset+DIM*2+XX] += fix2;
399         f[i_coord_offset+DIM*2+YY] += fiy2;
400         f[i_coord_offset+DIM*2+ZZ] += fiz2;
401         tx                         += fix2;
402         ty                         += fiy2;
403         tz                         += fiz2;
404         f[i_coord_offset+DIM*3+XX] += fix3;
405         f[i_coord_offset+DIM*3+YY] += fiy3;
406         f[i_coord_offset+DIM*3+ZZ] += fiz3;
407         tx                         += fix3;
408         ty                         += fiy3;
409         tz                         += fiz3;
410         fshift[i_shift_offset+XX]  += tx;
411         fshift[i_shift_offset+YY]  += ty;
412         fshift[i_shift_offset+ZZ]  += tz;
413
414         ggid                        = gid[iidx];
415         /* Update potential energies */
416         kernel_data->energygrp_elec[ggid] += velecsum;
417         kernel_data->energygrp_vdw[ggid] += vvdwsum;
418
419         /* Increment number of inner iterations */
420         inneriter                  += j_index_end - j_index_start;
421
422         /* Outer loop uses 41 flops */
423     }
424
425     /* Increment number of outer iterations */
426     outeriter        += nri;
427
428     /* Update outer/inner flops */
429
430     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*172);
431 }
432 /*
433  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_c
434  * Electrostatics interaction: Ewald
435  * VdW interaction:            LJEwald
436  * Geometry:                   Water4-Particle
437  * Calculate force/pot:        Force
438  */
439 void
440 nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_c
441                     (t_nblist                    * gmx_restrict       nlist,
442                      rvec                        * gmx_restrict          xx,
443                      rvec                        * gmx_restrict          ff,
444                      t_forcerec                  * gmx_restrict          fr,
445                      t_mdatoms                   * gmx_restrict     mdatoms,
446                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
447                      t_nrnb                      * gmx_restrict        nrnb)
448 {
449     int              i_shift_offset,i_coord_offset,j_coord_offset;
450     int              j_index_start,j_index_end;
451     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
452     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
453     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
454     real             *shiftvec,*fshift,*x,*f;
455     int              vdwioffset0;
456     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
457     int              vdwioffset1;
458     real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
459     int              vdwioffset2;
460     real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
461     int              vdwioffset3;
462     real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
463     int              vdwjidx0;
464     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
465     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
466     real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
467     real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
468     real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
469     real             velec,felec,velecsum,facel,crf,krf,krf2;
470     real             *charge;
471     int              nvdwtype;
472     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
473     int              *vdwtype;
474     real             *vdwparam;
475     real             c6grid_00;
476     real             c6grid_10;
477     real             c6grid_20;
478     real             c6grid_30;
479     real             ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,sh_lj_ewald;
480     real             *vdwgridparam;
481     int              ewitab;
482     real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
483     real             *ewtab;
484
485     x                = xx[0];
486     f                = ff[0];
487
488     nri              = nlist->nri;
489     iinr             = nlist->iinr;
490     jindex           = nlist->jindex;
491     jjnr             = nlist->jjnr;
492     shiftidx         = nlist->shift;
493     gid              = nlist->gid;
494     shiftvec         = fr->shift_vec[0];
495     fshift           = fr->fshift[0];
496     facel            = fr->epsfac;
497     charge           = mdatoms->chargeA;
498     nvdwtype         = fr->ntype;
499     vdwparam         = fr->nbfp;
500     vdwtype          = mdatoms->typeA;
501     vdwgridparam     = fr->ljpme_c6grid;
502     ewclj            = fr->ewaldcoeff_lj;
503     sh_lj_ewald      = fr->ic->sh_lj_ewald;
504     ewclj2           = ewclj*ewclj;
505     ewclj6           = ewclj2*ewclj2*ewclj2;
506
507     sh_ewald         = fr->ic->sh_ewald;
508     ewtab            = fr->ic->tabq_coul_F;
509     ewtabscale       = fr->ic->tabq_scale;
510     ewtabhalfspace   = 0.5/ewtabscale;
511
512     /* Setup water-specific parameters */
513     inr              = nlist->iinr[0];
514     iq1              = facel*charge[inr+1];
515     iq2              = facel*charge[inr+2];
516     iq3              = facel*charge[inr+3];
517     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
518
519     outeriter        = 0;
520     inneriter        = 0;
521
522     /* Start outer loop over neighborlists */
523     for(iidx=0; iidx<nri; iidx++)
524     {
525         /* Load shift vector for this list */
526         i_shift_offset   = DIM*shiftidx[iidx];
527         shX              = shiftvec[i_shift_offset+XX];
528         shY              = shiftvec[i_shift_offset+YY];
529         shZ              = shiftvec[i_shift_offset+ZZ];
530
531         /* Load limits for loop over neighbors */
532         j_index_start    = jindex[iidx];
533         j_index_end      = jindex[iidx+1];
534
535         /* Get outer coordinate index */
536         inr              = iinr[iidx];
537         i_coord_offset   = DIM*inr;
538
539         /* Load i particle coords and add shift vector */
540         ix0              = shX + x[i_coord_offset+DIM*0+XX];
541         iy0              = shY + x[i_coord_offset+DIM*0+YY];
542         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
543         ix1              = shX + x[i_coord_offset+DIM*1+XX];
544         iy1              = shY + x[i_coord_offset+DIM*1+YY];
545         iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
546         ix2              = shX + x[i_coord_offset+DIM*2+XX];
547         iy2              = shY + x[i_coord_offset+DIM*2+YY];
548         iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
549         ix3              = shX + x[i_coord_offset+DIM*3+XX];
550         iy3              = shY + x[i_coord_offset+DIM*3+YY];
551         iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
552
553         fix0             = 0.0;
554         fiy0             = 0.0;
555         fiz0             = 0.0;
556         fix1             = 0.0;
557         fiy1             = 0.0;
558         fiz1             = 0.0;
559         fix2             = 0.0;
560         fiy2             = 0.0;
561         fiz2             = 0.0;
562         fix3             = 0.0;
563         fiy3             = 0.0;
564         fiz3             = 0.0;
565
566         /* Start inner kernel loop */
567         for(jidx=j_index_start; jidx<j_index_end; jidx++)
568         {
569             /* Get j neighbor index, and coordinate index */
570             jnr              = jjnr[jidx];
571             j_coord_offset   = DIM*jnr;
572
573             /* load j atom coordinates */
574             jx0              = x[j_coord_offset+DIM*0+XX];
575             jy0              = x[j_coord_offset+DIM*0+YY];
576             jz0              = x[j_coord_offset+DIM*0+ZZ];
577
578             /* Calculate displacement vector */
579             dx00             = ix0 - jx0;
580             dy00             = iy0 - jy0;
581             dz00             = iz0 - jz0;
582             dx10             = ix1 - jx0;
583             dy10             = iy1 - jy0;
584             dz10             = iz1 - jz0;
585             dx20             = ix2 - jx0;
586             dy20             = iy2 - jy0;
587             dz20             = iz2 - jz0;
588             dx30             = ix3 - jx0;
589             dy30             = iy3 - jy0;
590             dz30             = iz3 - jz0;
591
592             /* Calculate squared distance and things based on it */
593             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
594             rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
595             rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
596             rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
597
598             rinv00           = gmx_invsqrt(rsq00);
599             rinv10           = gmx_invsqrt(rsq10);
600             rinv20           = gmx_invsqrt(rsq20);
601             rinv30           = gmx_invsqrt(rsq30);
602
603             rinvsq00         = rinv00*rinv00;
604             rinvsq10         = rinv10*rinv10;
605             rinvsq20         = rinv20*rinv20;
606             rinvsq30         = rinv30*rinv30;
607
608             /* Load parameters for j particles */
609             jq0              = charge[jnr+0];
610             vdwjidx0         = 2*vdwtype[jnr+0];
611
612             /**************************
613              * CALCULATE INTERACTIONS *
614              **************************/
615
616             r00              = rsq00*rinv00;
617
618             c6_00            = vdwparam[vdwioffset0+vdwjidx0];
619             c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
620             c6grid_00        = vdwgridparam[vdwioffset0+vdwjidx0];
621
622             rinvsix          = rinvsq00*rinvsq00*rinvsq00;
623             ewcljrsq         = ewclj2*rsq00;
624             exponent         = exp(-ewcljrsq);
625             poly             = exponent*(1.0 + ewcljrsq + ewcljrsq*ewcljrsq*0.5);
626             fvdw             = (((c12_00*rinvsix - c6_00 + c6grid_00*(1.0-poly))*rinvsix) - c6grid_00*(1.0/6.0)*exponent*ewclj6)*rinvsq00;
627
628             fscal            = fvdw;
629
630             /* Calculate temporary vectorial force */
631             tx               = fscal*dx00;
632             ty               = fscal*dy00;
633             tz               = fscal*dz00;
634
635             /* Update vectorial force */
636             fix0            += tx;
637             fiy0            += ty;
638             fiz0            += tz;
639             f[j_coord_offset+DIM*0+XX] -= tx;
640             f[j_coord_offset+DIM*0+YY] -= ty;
641             f[j_coord_offset+DIM*0+ZZ] -= tz;
642
643             /**************************
644              * CALCULATE INTERACTIONS *
645              **************************/
646
647             r10              = rsq10*rinv10;
648
649             qq10             = iq1*jq0;
650
651             /* EWALD ELECTROSTATICS */
652
653             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
654             ewrt             = r10*ewtabscale;
655             ewitab           = ewrt;
656             eweps            = ewrt-ewitab;
657             felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
658             felec            = qq10*rinv10*(rinvsq10-felec);
659
660             fscal            = felec;
661
662             /* Calculate temporary vectorial force */
663             tx               = fscal*dx10;
664             ty               = fscal*dy10;
665             tz               = fscal*dz10;
666
667             /* Update vectorial force */
668             fix1            += tx;
669             fiy1            += ty;
670             fiz1            += tz;
671             f[j_coord_offset+DIM*0+XX] -= tx;
672             f[j_coord_offset+DIM*0+YY] -= ty;
673             f[j_coord_offset+DIM*0+ZZ] -= tz;
674
675             /**************************
676              * CALCULATE INTERACTIONS *
677              **************************/
678
679             r20              = rsq20*rinv20;
680
681             qq20             = iq2*jq0;
682
683             /* EWALD ELECTROSTATICS */
684
685             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
686             ewrt             = r20*ewtabscale;
687             ewitab           = ewrt;
688             eweps            = ewrt-ewitab;
689             felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
690             felec            = qq20*rinv20*(rinvsq20-felec);
691
692             fscal            = felec;
693
694             /* Calculate temporary vectorial force */
695             tx               = fscal*dx20;
696             ty               = fscal*dy20;
697             tz               = fscal*dz20;
698
699             /* Update vectorial force */
700             fix2            += tx;
701             fiy2            += ty;
702             fiz2            += tz;
703             f[j_coord_offset+DIM*0+XX] -= tx;
704             f[j_coord_offset+DIM*0+YY] -= ty;
705             f[j_coord_offset+DIM*0+ZZ] -= tz;
706
707             /**************************
708              * CALCULATE INTERACTIONS *
709              **************************/
710
711             r30              = rsq30*rinv30;
712
713             qq30             = iq3*jq0;
714
715             /* EWALD ELECTROSTATICS */
716
717             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
718             ewrt             = r30*ewtabscale;
719             ewitab           = ewrt;
720             eweps            = ewrt-ewitab;
721             felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
722             felec            = qq30*rinv30*(rinvsq30-felec);
723
724             fscal            = felec;
725
726             /* Calculate temporary vectorial force */
727             tx               = fscal*dx30;
728             ty               = fscal*dy30;
729             tz               = fscal*dz30;
730
731             /* Update vectorial force */
732             fix3            += tx;
733             fiy3            += ty;
734             fiz3            += tz;
735             f[j_coord_offset+DIM*0+XX] -= tx;
736             f[j_coord_offset+DIM*0+YY] -= ty;
737             f[j_coord_offset+DIM*0+ZZ] -= tz;
738
739             /* Inner loop uses 146 flops */
740         }
741         /* End of innermost loop */
742
743         tx = ty = tz = 0;
744         f[i_coord_offset+DIM*0+XX] += fix0;
745         f[i_coord_offset+DIM*0+YY] += fiy0;
746         f[i_coord_offset+DIM*0+ZZ] += fiz0;
747         tx                         += fix0;
748         ty                         += fiy0;
749         tz                         += fiz0;
750         f[i_coord_offset+DIM*1+XX] += fix1;
751         f[i_coord_offset+DIM*1+YY] += fiy1;
752         f[i_coord_offset+DIM*1+ZZ] += fiz1;
753         tx                         += fix1;
754         ty                         += fiy1;
755         tz                         += fiz1;
756         f[i_coord_offset+DIM*2+XX] += fix2;
757         f[i_coord_offset+DIM*2+YY] += fiy2;
758         f[i_coord_offset+DIM*2+ZZ] += fiz2;
759         tx                         += fix2;
760         ty                         += fiy2;
761         tz                         += fiz2;
762         f[i_coord_offset+DIM*3+XX] += fix3;
763         f[i_coord_offset+DIM*3+YY] += fiy3;
764         f[i_coord_offset+DIM*3+ZZ] += fiz3;
765         tx                         += fix3;
766         ty                         += fiy3;
767         tz                         += fiz3;
768         fshift[i_shift_offset+XX]  += tx;
769         fshift[i_shift_offset+YY]  += ty;
770         fshift[i_shift_offset+ZZ]  += tz;
771
772         /* Increment number of inner iterations */
773         inneriter                  += j_index_end - j_index_start;
774
775         /* Outer loop uses 39 flops */
776     }
777
778     /* Increment number of outer iterations */
779     outeriter        += nri;
780
781     /* Update outer/inner flops */
782
783     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*146);
784 }