65afc9e980e907986081f75d321dcec44c9fcbc2
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecEw_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "vec.h"
47 #include "nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: Ewald
54  * VdW interaction:            CubicSplineTable
55  * Geometry:                   Water4-Water4
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset0;
82     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
83     int              vdwioffset1;
84     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
85     int              vdwioffset2;
86     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87     int              vdwioffset3;
88     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
89     int              vdwjidx0A,vdwjidx0B;
90     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
91     int              vdwjidx1A,vdwjidx1B;
92     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
93     int              vdwjidx2A,vdwjidx2B;
94     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
95     int              vdwjidx3A,vdwjidx3B;
96     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
97     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
98     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
99     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
100     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
101     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
102     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
103     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
104     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
105     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
106     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
107     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
108     real             *charge;
109     int              nvdwtype;
110     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
111     int              *vdwtype;
112     real             *vdwparam;
113     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
114     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
115     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
116     real             *vftab;
117     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
118     real             *ewtab;
119     _fjsp_v2r8       itab_tmp;
120     _fjsp_v2r8       dummy_mask,cutoff_mask;
121     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
122     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
123     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
124
125     x                = xx[0];
126     f                = ff[0];
127
128     nri              = nlist->nri;
129     iinr             = nlist->iinr;
130     jindex           = nlist->jindex;
131     jjnr             = nlist->jjnr;
132     shiftidx         = nlist->shift;
133     gid              = nlist->gid;
134     shiftvec         = fr->shift_vec[0];
135     fshift           = fr->fshift[0];
136     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
137     charge           = mdatoms->chargeA;
138     nvdwtype         = fr->ntype;
139     vdwparam         = fr->nbfp;
140     vdwtype          = mdatoms->typeA;
141
142     vftab            = kernel_data->table_vdw->data;
143     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
144
145     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
146     ewtab            = fr->ic->tabq_coul_FDV0;
147     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
148     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
149
150     /* Setup water-specific parameters */
151     inr              = nlist->iinr[0];
152     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
153     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
154     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
155     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
156
157     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
158     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
159     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
160     vdwjidx0A        = 2*vdwtype[inr+0];
161     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
162     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
163     qq11             = _fjsp_mul_v2r8(iq1,jq1);
164     qq12             = _fjsp_mul_v2r8(iq1,jq2);
165     qq13             = _fjsp_mul_v2r8(iq1,jq3);
166     qq21             = _fjsp_mul_v2r8(iq2,jq1);
167     qq22             = _fjsp_mul_v2r8(iq2,jq2);
168     qq23             = _fjsp_mul_v2r8(iq2,jq3);
169     qq31             = _fjsp_mul_v2r8(iq3,jq1);
170     qq32             = _fjsp_mul_v2r8(iq3,jq2);
171     qq33             = _fjsp_mul_v2r8(iq3,jq3);
172
173     /* Avoid stupid compiler warnings */
174     jnrA = jnrB = 0;
175     j_coord_offsetA = 0;
176     j_coord_offsetB = 0;
177
178     outeriter        = 0;
179     inneriter        = 0;
180
181     /* Start outer loop over neighborlists */
182     for(iidx=0; iidx<nri; iidx++)
183     {
184         /* Load shift vector for this list */
185         i_shift_offset   = DIM*shiftidx[iidx];
186
187         /* Load limits for loop over neighbors */
188         j_index_start    = jindex[iidx];
189         j_index_end      = jindex[iidx+1];
190
191         /* Get outer coordinate index */
192         inr              = iinr[iidx];
193         i_coord_offset   = DIM*inr;
194
195         /* Load i particle coords and add shift vector */
196         gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
197                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
198
199         fix0             = _fjsp_setzero_v2r8();
200         fiy0             = _fjsp_setzero_v2r8();
201         fiz0             = _fjsp_setzero_v2r8();
202         fix1             = _fjsp_setzero_v2r8();
203         fiy1             = _fjsp_setzero_v2r8();
204         fiz1             = _fjsp_setzero_v2r8();
205         fix2             = _fjsp_setzero_v2r8();
206         fiy2             = _fjsp_setzero_v2r8();
207         fiz2             = _fjsp_setzero_v2r8();
208         fix3             = _fjsp_setzero_v2r8();
209         fiy3             = _fjsp_setzero_v2r8();
210         fiz3             = _fjsp_setzero_v2r8();
211
212         /* Reset potential sums */
213         velecsum         = _fjsp_setzero_v2r8();
214         vvdwsum          = _fjsp_setzero_v2r8();
215
216         /* Start inner kernel loop */
217         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
218         {
219
220             /* Get j neighbor index, and coordinate index */
221             jnrA             = jjnr[jidx];
222             jnrB             = jjnr[jidx+1];
223             j_coord_offsetA  = DIM*jnrA;
224             j_coord_offsetB  = DIM*jnrB;
225
226             /* load j atom coordinates */
227             gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
228                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
229                                               &jy2,&jz2,&jx3,&jy3,&jz3);
230
231             /* Calculate displacement vector */
232             dx00             = _fjsp_sub_v2r8(ix0,jx0);
233             dy00             = _fjsp_sub_v2r8(iy0,jy0);
234             dz00             = _fjsp_sub_v2r8(iz0,jz0);
235             dx11             = _fjsp_sub_v2r8(ix1,jx1);
236             dy11             = _fjsp_sub_v2r8(iy1,jy1);
237             dz11             = _fjsp_sub_v2r8(iz1,jz1);
238             dx12             = _fjsp_sub_v2r8(ix1,jx2);
239             dy12             = _fjsp_sub_v2r8(iy1,jy2);
240             dz12             = _fjsp_sub_v2r8(iz1,jz2);
241             dx13             = _fjsp_sub_v2r8(ix1,jx3);
242             dy13             = _fjsp_sub_v2r8(iy1,jy3);
243             dz13             = _fjsp_sub_v2r8(iz1,jz3);
244             dx21             = _fjsp_sub_v2r8(ix2,jx1);
245             dy21             = _fjsp_sub_v2r8(iy2,jy1);
246             dz21             = _fjsp_sub_v2r8(iz2,jz1);
247             dx22             = _fjsp_sub_v2r8(ix2,jx2);
248             dy22             = _fjsp_sub_v2r8(iy2,jy2);
249             dz22             = _fjsp_sub_v2r8(iz2,jz2);
250             dx23             = _fjsp_sub_v2r8(ix2,jx3);
251             dy23             = _fjsp_sub_v2r8(iy2,jy3);
252             dz23             = _fjsp_sub_v2r8(iz2,jz3);
253             dx31             = _fjsp_sub_v2r8(ix3,jx1);
254             dy31             = _fjsp_sub_v2r8(iy3,jy1);
255             dz31             = _fjsp_sub_v2r8(iz3,jz1);
256             dx32             = _fjsp_sub_v2r8(ix3,jx2);
257             dy32             = _fjsp_sub_v2r8(iy3,jy2);
258             dz32             = _fjsp_sub_v2r8(iz3,jz2);
259             dx33             = _fjsp_sub_v2r8(ix3,jx3);
260             dy33             = _fjsp_sub_v2r8(iy3,jy3);
261             dz33             = _fjsp_sub_v2r8(iz3,jz3);
262
263             /* Calculate squared distance and things based on it */
264             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
265             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
266             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
267             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
268             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
269             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
270             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
271             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
272             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
273             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
274
275             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
276             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
277             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
278             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
279             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
280             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
281             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
282             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
283             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
284             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
285
286             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
287             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
288             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
289             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
290             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
291             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
292             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
293             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
294             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
295
296             fjx0             = _fjsp_setzero_v2r8();
297             fjy0             = _fjsp_setzero_v2r8();
298             fjz0             = _fjsp_setzero_v2r8();
299             fjx1             = _fjsp_setzero_v2r8();
300             fjy1             = _fjsp_setzero_v2r8();
301             fjz1             = _fjsp_setzero_v2r8();
302             fjx2             = _fjsp_setzero_v2r8();
303             fjy2             = _fjsp_setzero_v2r8();
304             fjz2             = _fjsp_setzero_v2r8();
305             fjx3             = _fjsp_setzero_v2r8();
306             fjy3             = _fjsp_setzero_v2r8();
307             fjz3             = _fjsp_setzero_v2r8();
308
309             /**************************
310              * CALCULATE INTERACTIONS *
311              **************************/
312
313             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
314
315             /* Calculate table index by multiplying r with table scale and truncate to integer */
316             rt               = _fjsp_mul_v2r8(r00,vftabscale);
317             itab_tmp         = _fjsp_dtox_v2r8(rt);
318             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
319             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
320             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
321
322             vfconv.i[0]     *= 8;
323             vfconv.i[1]     *= 8;
324
325             /* CUBIC SPLINE TABLE DISPERSION */
326             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
327             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
328             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
329             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
330             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
331             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
332             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
333             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
334             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
335             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
336             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
337
338             /* CUBIC SPLINE TABLE REPULSION */
339             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
340             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
341             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
342             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
343             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
344             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
345             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
346             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
347             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
348             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
349             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
350             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
351             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
352
353             /* Update potential sum for this i atom from the interaction with this j atom. */
354             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
355
356             fscal            = fvdw;
357
358             /* Update vectorial force */
359             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
360             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
361             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
362             
363             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
364             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
365             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
366
367             /**************************
368              * CALCULATE INTERACTIONS *
369              **************************/
370
371             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
372
373             /* EWALD ELECTROSTATICS */
374
375             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
376             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
377             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
378             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
379             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
380
381             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
382             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
383             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
384             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
385             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
386             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
387             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
388             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
389             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
390             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
391
392             /* Update potential sum for this i atom from the interaction with this j atom. */
393             velecsum         = _fjsp_add_v2r8(velecsum,velec);
394
395             fscal            = felec;
396
397             /* Update vectorial force */
398             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
399             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
400             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
401             
402             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
403             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
404             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
405
406             /**************************
407              * CALCULATE INTERACTIONS *
408              **************************/
409
410             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
411
412             /* EWALD ELECTROSTATICS */
413
414             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
415             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
416             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
417             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
418             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
419
420             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
421             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
422             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
423             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
424             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
425             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
426             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
427             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
428             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
429             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
430
431             /* Update potential sum for this i atom from the interaction with this j atom. */
432             velecsum         = _fjsp_add_v2r8(velecsum,velec);
433
434             fscal            = felec;
435
436             /* Update vectorial force */
437             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
438             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
439             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
440             
441             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
442             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
443             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
444
445             /**************************
446              * CALCULATE INTERACTIONS *
447              **************************/
448
449             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
450
451             /* EWALD ELECTROSTATICS */
452
453             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
454             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
455             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
456             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
457             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
458
459             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
460             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
461             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
462             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
463             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
464             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
465             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
466             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
467             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
468             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
469
470             /* Update potential sum for this i atom from the interaction with this j atom. */
471             velecsum         = _fjsp_add_v2r8(velecsum,velec);
472
473             fscal            = felec;
474
475             /* Update vectorial force */
476             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
477             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
478             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
479             
480             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
481             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
482             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
483
484             /**************************
485              * CALCULATE INTERACTIONS *
486              **************************/
487
488             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
489
490             /* EWALD ELECTROSTATICS */
491
492             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
493             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
494             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
495             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
496             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
497
498             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
499             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
500             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
501             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
502             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
503             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
504             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
505             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
506             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
507             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
508
509             /* Update potential sum for this i atom from the interaction with this j atom. */
510             velecsum         = _fjsp_add_v2r8(velecsum,velec);
511
512             fscal            = felec;
513
514             /* Update vectorial force */
515             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
516             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
517             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
518             
519             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
520             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
521             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
522
523             /**************************
524              * CALCULATE INTERACTIONS *
525              **************************/
526
527             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
528
529             /* EWALD ELECTROSTATICS */
530
531             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
532             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
533             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
534             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
535             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
536
537             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
538             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
539             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
540             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
541             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
542             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
543             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
544             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
545             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
546             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
547
548             /* Update potential sum for this i atom from the interaction with this j atom. */
549             velecsum         = _fjsp_add_v2r8(velecsum,velec);
550
551             fscal            = felec;
552
553             /* Update vectorial force */
554             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
555             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
556             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
557             
558             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
559             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
560             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
561
562             /**************************
563              * CALCULATE INTERACTIONS *
564              **************************/
565
566             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
567
568             /* EWALD ELECTROSTATICS */
569
570             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
571             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
572             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
573             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
574             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
575
576             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
577             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
578             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
579             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
580             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
581             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
582             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
583             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
584             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
585             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
586
587             /* Update potential sum for this i atom from the interaction with this j atom. */
588             velecsum         = _fjsp_add_v2r8(velecsum,velec);
589
590             fscal            = felec;
591
592             /* Update vectorial force */
593             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
594             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
595             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
596             
597             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
598             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
599             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
600
601             /**************************
602              * CALCULATE INTERACTIONS *
603              **************************/
604
605             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
606
607             /* EWALD ELECTROSTATICS */
608
609             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
610             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
611             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
612             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
613             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
614
615             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
616             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
617             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
618             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
619             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
620             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
621             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
622             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
623             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
624             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
625
626             /* Update potential sum for this i atom from the interaction with this j atom. */
627             velecsum         = _fjsp_add_v2r8(velecsum,velec);
628
629             fscal            = felec;
630
631             /* Update vectorial force */
632             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
633             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
634             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
635             
636             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
637             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
638             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
639
640             /**************************
641              * CALCULATE INTERACTIONS *
642              **************************/
643
644             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
645
646             /* EWALD ELECTROSTATICS */
647
648             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
649             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
650             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
651             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
652             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
653
654             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
655             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
656             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
657             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
658             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
659             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
660             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
661             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
662             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
663             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
664
665             /* Update potential sum for this i atom from the interaction with this j atom. */
666             velecsum         = _fjsp_add_v2r8(velecsum,velec);
667
668             fscal            = felec;
669
670             /* Update vectorial force */
671             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
672             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
673             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
674             
675             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
676             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
677             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
678
679             /**************************
680              * CALCULATE INTERACTIONS *
681              **************************/
682
683             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
684
685             /* EWALD ELECTROSTATICS */
686
687             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
688             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
689             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
690             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
691             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
692
693             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
694             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
695             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
696             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
697             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
698             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
699             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
700             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
701             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
702             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
703
704             /* Update potential sum for this i atom from the interaction with this j atom. */
705             velecsum         = _fjsp_add_v2r8(velecsum,velec);
706
707             fscal            = felec;
708
709             /* Update vectorial force */
710             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
711             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
712             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
713             
714             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
715             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
716             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
717
718             gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
719
720             /* Inner loop uses 458 flops */
721         }
722
723         if(jidx<j_index_end)
724         {
725
726             jnrA             = jjnr[jidx];
727             j_coord_offsetA  = DIM*jnrA;
728
729             /* load j atom coordinates */
730             gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
731                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
732                                               &jy2,&jz2,&jx3,&jy3,&jz3);
733
734             /* Calculate displacement vector */
735             dx00             = _fjsp_sub_v2r8(ix0,jx0);
736             dy00             = _fjsp_sub_v2r8(iy0,jy0);
737             dz00             = _fjsp_sub_v2r8(iz0,jz0);
738             dx11             = _fjsp_sub_v2r8(ix1,jx1);
739             dy11             = _fjsp_sub_v2r8(iy1,jy1);
740             dz11             = _fjsp_sub_v2r8(iz1,jz1);
741             dx12             = _fjsp_sub_v2r8(ix1,jx2);
742             dy12             = _fjsp_sub_v2r8(iy1,jy2);
743             dz12             = _fjsp_sub_v2r8(iz1,jz2);
744             dx13             = _fjsp_sub_v2r8(ix1,jx3);
745             dy13             = _fjsp_sub_v2r8(iy1,jy3);
746             dz13             = _fjsp_sub_v2r8(iz1,jz3);
747             dx21             = _fjsp_sub_v2r8(ix2,jx1);
748             dy21             = _fjsp_sub_v2r8(iy2,jy1);
749             dz21             = _fjsp_sub_v2r8(iz2,jz1);
750             dx22             = _fjsp_sub_v2r8(ix2,jx2);
751             dy22             = _fjsp_sub_v2r8(iy2,jy2);
752             dz22             = _fjsp_sub_v2r8(iz2,jz2);
753             dx23             = _fjsp_sub_v2r8(ix2,jx3);
754             dy23             = _fjsp_sub_v2r8(iy2,jy3);
755             dz23             = _fjsp_sub_v2r8(iz2,jz3);
756             dx31             = _fjsp_sub_v2r8(ix3,jx1);
757             dy31             = _fjsp_sub_v2r8(iy3,jy1);
758             dz31             = _fjsp_sub_v2r8(iz3,jz1);
759             dx32             = _fjsp_sub_v2r8(ix3,jx2);
760             dy32             = _fjsp_sub_v2r8(iy3,jy2);
761             dz32             = _fjsp_sub_v2r8(iz3,jz2);
762             dx33             = _fjsp_sub_v2r8(ix3,jx3);
763             dy33             = _fjsp_sub_v2r8(iy3,jy3);
764             dz33             = _fjsp_sub_v2r8(iz3,jz3);
765
766             /* Calculate squared distance and things based on it */
767             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
768             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
769             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
770             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
771             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
772             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
773             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
774             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
775             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
776             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
777
778             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
779             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
780             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
781             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
782             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
783             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
784             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
785             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
786             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
787             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
788
789             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
790             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
791             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
792             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
793             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
794             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
795             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
796             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
797             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
798
799             fjx0             = _fjsp_setzero_v2r8();
800             fjy0             = _fjsp_setzero_v2r8();
801             fjz0             = _fjsp_setzero_v2r8();
802             fjx1             = _fjsp_setzero_v2r8();
803             fjy1             = _fjsp_setzero_v2r8();
804             fjz1             = _fjsp_setzero_v2r8();
805             fjx2             = _fjsp_setzero_v2r8();
806             fjy2             = _fjsp_setzero_v2r8();
807             fjz2             = _fjsp_setzero_v2r8();
808             fjx3             = _fjsp_setzero_v2r8();
809             fjy3             = _fjsp_setzero_v2r8();
810             fjz3             = _fjsp_setzero_v2r8();
811
812             /**************************
813              * CALCULATE INTERACTIONS *
814              **************************/
815
816             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
817
818             /* Calculate table index by multiplying r with table scale and truncate to integer */
819             rt               = _fjsp_mul_v2r8(r00,vftabscale);
820             itab_tmp         = _fjsp_dtox_v2r8(rt);
821             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
822             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
823             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
824
825             vfconv.i[0]     *= 8;
826             vfconv.i[1]     *= 8;
827
828             /* CUBIC SPLINE TABLE DISPERSION */
829             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
830             F                = _fjsp_setzero_v2r8();
831             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
832             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
833             H                = _fjsp_setzero_v2r8();
834             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
835             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
836             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
837             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
838             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
839             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
840
841             /* CUBIC SPLINE TABLE REPULSION */
842             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
843             F                = _fjsp_setzero_v2r8();
844             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
845             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
846             H                = _fjsp_setzero_v2r8();
847             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
848             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
849             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
850             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
851             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
852             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
853             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
854             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
855
856             /* Update potential sum for this i atom from the interaction with this j atom. */
857             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
858             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
859
860             fscal            = fvdw;
861
862             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
863
864             /* Update vectorial force */
865             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
866             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
867             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
868             
869             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
870             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
871             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
872
873             /**************************
874              * CALCULATE INTERACTIONS *
875              **************************/
876
877             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
878
879             /* EWALD ELECTROSTATICS */
880
881             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
882             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
883             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
884             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
885             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
886
887             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
888             ewtabD           = _fjsp_setzero_v2r8();
889             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
890             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
891             ewtabFn          = _fjsp_setzero_v2r8();
892             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
893             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
894             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
895             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
896             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
897
898             /* Update potential sum for this i atom from the interaction with this j atom. */
899             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
900             velecsum         = _fjsp_add_v2r8(velecsum,velec);
901
902             fscal            = felec;
903
904             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
905
906             /* Update vectorial force */
907             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
908             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
909             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
910             
911             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
912             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
913             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
914
915             /**************************
916              * CALCULATE INTERACTIONS *
917              **************************/
918
919             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
920
921             /* EWALD ELECTROSTATICS */
922
923             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
924             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
925             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
926             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
927             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
928
929             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
930             ewtabD           = _fjsp_setzero_v2r8();
931             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
932             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
933             ewtabFn          = _fjsp_setzero_v2r8();
934             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
935             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
936             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
937             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
938             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
939
940             /* Update potential sum for this i atom from the interaction with this j atom. */
941             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
942             velecsum         = _fjsp_add_v2r8(velecsum,velec);
943
944             fscal            = felec;
945
946             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
947
948             /* Update vectorial force */
949             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
950             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
951             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
952             
953             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
954             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
955             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
956
957             /**************************
958              * CALCULATE INTERACTIONS *
959              **************************/
960
961             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
962
963             /* EWALD ELECTROSTATICS */
964
965             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
966             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
967             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
968             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
969             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
970
971             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
972             ewtabD           = _fjsp_setzero_v2r8();
973             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
974             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
975             ewtabFn          = _fjsp_setzero_v2r8();
976             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
977             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
978             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
979             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
980             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
981
982             /* Update potential sum for this i atom from the interaction with this j atom. */
983             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
984             velecsum         = _fjsp_add_v2r8(velecsum,velec);
985
986             fscal            = felec;
987
988             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
989
990             /* Update vectorial force */
991             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
992             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
993             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
994             
995             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
996             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
997             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
998
999             /**************************
1000              * CALCULATE INTERACTIONS *
1001              **************************/
1002
1003             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1004
1005             /* EWALD ELECTROSTATICS */
1006
1007             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1008             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1009             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1010             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1011             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1012
1013             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1014             ewtabD           = _fjsp_setzero_v2r8();
1015             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1016             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1017             ewtabFn          = _fjsp_setzero_v2r8();
1018             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1019             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1020             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1021             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
1022             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1023
1024             /* Update potential sum for this i atom from the interaction with this j atom. */
1025             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1026             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1027
1028             fscal            = felec;
1029
1030             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1031
1032             /* Update vectorial force */
1033             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1034             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1035             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1036             
1037             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1038             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1039             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1040
1041             /**************************
1042              * CALCULATE INTERACTIONS *
1043              **************************/
1044
1045             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1046
1047             /* EWALD ELECTROSTATICS */
1048
1049             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1050             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1051             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1052             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1053             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1054
1055             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1056             ewtabD           = _fjsp_setzero_v2r8();
1057             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1058             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1059             ewtabFn          = _fjsp_setzero_v2r8();
1060             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1061             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1062             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1063             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
1064             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1065
1066             /* Update potential sum for this i atom from the interaction with this j atom. */
1067             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1068             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1069
1070             fscal            = felec;
1071
1072             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1073
1074             /* Update vectorial force */
1075             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1076             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1077             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1078             
1079             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1080             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1081             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1082
1083             /**************************
1084              * CALCULATE INTERACTIONS *
1085              **************************/
1086
1087             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
1088
1089             /* EWALD ELECTROSTATICS */
1090
1091             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1092             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
1093             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1094             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1095             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1096
1097             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1098             ewtabD           = _fjsp_setzero_v2r8();
1099             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1100             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1101             ewtabFn          = _fjsp_setzero_v2r8();
1102             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1103             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1104             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1105             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
1106             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
1107
1108             /* Update potential sum for this i atom from the interaction with this j atom. */
1109             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1110             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1111
1112             fscal            = felec;
1113
1114             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1115
1116             /* Update vectorial force */
1117             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1118             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1119             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1120             
1121             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1122             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1123             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1124
1125             /**************************
1126              * CALCULATE INTERACTIONS *
1127              **************************/
1128
1129             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
1130
1131             /* EWALD ELECTROSTATICS */
1132
1133             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1134             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
1135             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1136             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1137             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1138
1139             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1140             ewtabD           = _fjsp_setzero_v2r8();
1141             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1142             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1143             ewtabFn          = _fjsp_setzero_v2r8();
1144             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1145             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1146             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1147             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
1148             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
1149
1150             /* Update potential sum for this i atom from the interaction with this j atom. */
1151             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1152             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1153
1154             fscal            = felec;
1155
1156             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1157
1158             /* Update vectorial force */
1159             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1160             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1161             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1162             
1163             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1164             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1165             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1166
1167             /**************************
1168              * CALCULATE INTERACTIONS *
1169              **************************/
1170
1171             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
1172
1173             /* EWALD ELECTROSTATICS */
1174
1175             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1176             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
1177             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1178             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1179             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1180
1181             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1182             ewtabD           = _fjsp_setzero_v2r8();
1183             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1184             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1185             ewtabFn          = _fjsp_setzero_v2r8();
1186             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1187             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1188             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1189             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
1190             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
1191
1192             /* Update potential sum for this i atom from the interaction with this j atom. */
1193             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1194             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1195
1196             fscal            = felec;
1197
1198             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1199
1200             /* Update vectorial force */
1201             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1202             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1203             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1204             
1205             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1206             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1207             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1208
1209             /**************************
1210              * CALCULATE INTERACTIONS *
1211              **************************/
1212
1213             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
1214
1215             /* EWALD ELECTROSTATICS */
1216
1217             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1218             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
1219             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1220             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1221             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1222
1223             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1224             ewtabD           = _fjsp_setzero_v2r8();
1225             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1226             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1227             ewtabFn          = _fjsp_setzero_v2r8();
1228             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1229             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1230             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1231             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
1232             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
1233
1234             /* Update potential sum for this i atom from the interaction with this j atom. */
1235             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1236             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1237
1238             fscal            = felec;
1239
1240             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1241
1242             /* Update vectorial force */
1243             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1244             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1245             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1246             
1247             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1248             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1249             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1250
1251             gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1252
1253             /* Inner loop uses 458 flops */
1254         }
1255
1256         /* End of innermost loop */
1257
1258         gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1259                                               f+i_coord_offset,fshift+i_shift_offset);
1260
1261         ggid                        = gid[iidx];
1262         /* Update potential energies */
1263         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1264         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1265
1266         /* Increment number of inner iterations */
1267         inneriter                  += j_index_end - j_index_start;
1268
1269         /* Outer loop uses 26 flops */
1270     }
1271
1272     /* Increment number of outer iterations */
1273     outeriter        += nri;
1274
1275     /* Update outer/inner flops */
1276
1277     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*458);
1278 }
1279 /*
1280  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
1281  * Electrostatics interaction: Ewald
1282  * VdW interaction:            CubicSplineTable
1283  * Geometry:                   Water4-Water4
1284  * Calculate force/pot:        Force
1285  */
1286 void
1287 nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
1288                     (t_nblist                    * gmx_restrict       nlist,
1289                      rvec                        * gmx_restrict          xx,
1290                      rvec                        * gmx_restrict          ff,
1291                      t_forcerec                  * gmx_restrict          fr,
1292                      t_mdatoms                   * gmx_restrict     mdatoms,
1293                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1294                      t_nrnb                      * gmx_restrict        nrnb)
1295 {
1296     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1297      * just 0 for non-waters.
1298      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1299      * jnr indices corresponding to data put in the four positions in the SIMD register.
1300      */
1301     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1302     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1303     int              jnrA,jnrB;
1304     int              j_coord_offsetA,j_coord_offsetB;
1305     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1306     real             rcutoff_scalar;
1307     real             *shiftvec,*fshift,*x,*f;
1308     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1309     int              vdwioffset0;
1310     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1311     int              vdwioffset1;
1312     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1313     int              vdwioffset2;
1314     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1315     int              vdwioffset3;
1316     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1317     int              vdwjidx0A,vdwjidx0B;
1318     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1319     int              vdwjidx1A,vdwjidx1B;
1320     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1321     int              vdwjidx2A,vdwjidx2B;
1322     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1323     int              vdwjidx3A,vdwjidx3B;
1324     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1325     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1326     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1327     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1328     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1329     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1330     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1331     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1332     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1333     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1334     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1335     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1336     real             *charge;
1337     int              nvdwtype;
1338     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1339     int              *vdwtype;
1340     real             *vdwparam;
1341     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1342     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1343     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1344     real             *vftab;
1345     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1346     real             *ewtab;
1347     _fjsp_v2r8       itab_tmp;
1348     _fjsp_v2r8       dummy_mask,cutoff_mask;
1349     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1350     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1351     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1352
1353     x                = xx[0];
1354     f                = ff[0];
1355
1356     nri              = nlist->nri;
1357     iinr             = nlist->iinr;
1358     jindex           = nlist->jindex;
1359     jjnr             = nlist->jjnr;
1360     shiftidx         = nlist->shift;
1361     gid              = nlist->gid;
1362     shiftvec         = fr->shift_vec[0];
1363     fshift           = fr->fshift[0];
1364     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1365     charge           = mdatoms->chargeA;
1366     nvdwtype         = fr->ntype;
1367     vdwparam         = fr->nbfp;
1368     vdwtype          = mdatoms->typeA;
1369
1370     vftab            = kernel_data->table_vdw->data;
1371     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
1372
1373     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1374     ewtab            = fr->ic->tabq_coul_F;
1375     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1376     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1377
1378     /* Setup water-specific parameters */
1379     inr              = nlist->iinr[0];
1380     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1381     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1382     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1383     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1384
1385     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1386     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1387     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
1388     vdwjidx0A        = 2*vdwtype[inr+0];
1389     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1390     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1391     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1392     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1393     qq13             = _fjsp_mul_v2r8(iq1,jq3);
1394     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1395     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1396     qq23             = _fjsp_mul_v2r8(iq2,jq3);
1397     qq31             = _fjsp_mul_v2r8(iq3,jq1);
1398     qq32             = _fjsp_mul_v2r8(iq3,jq2);
1399     qq33             = _fjsp_mul_v2r8(iq3,jq3);
1400
1401     /* Avoid stupid compiler warnings */
1402     jnrA = jnrB = 0;
1403     j_coord_offsetA = 0;
1404     j_coord_offsetB = 0;
1405
1406     outeriter        = 0;
1407     inneriter        = 0;
1408
1409     /* Start outer loop over neighborlists */
1410     for(iidx=0; iidx<nri; iidx++)
1411     {
1412         /* Load shift vector for this list */
1413         i_shift_offset   = DIM*shiftidx[iidx];
1414
1415         /* Load limits for loop over neighbors */
1416         j_index_start    = jindex[iidx];
1417         j_index_end      = jindex[iidx+1];
1418
1419         /* Get outer coordinate index */
1420         inr              = iinr[iidx];
1421         i_coord_offset   = DIM*inr;
1422
1423         /* Load i particle coords and add shift vector */
1424         gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1425                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1426
1427         fix0             = _fjsp_setzero_v2r8();
1428         fiy0             = _fjsp_setzero_v2r8();
1429         fiz0             = _fjsp_setzero_v2r8();
1430         fix1             = _fjsp_setzero_v2r8();
1431         fiy1             = _fjsp_setzero_v2r8();
1432         fiz1             = _fjsp_setzero_v2r8();
1433         fix2             = _fjsp_setzero_v2r8();
1434         fiy2             = _fjsp_setzero_v2r8();
1435         fiz2             = _fjsp_setzero_v2r8();
1436         fix3             = _fjsp_setzero_v2r8();
1437         fiy3             = _fjsp_setzero_v2r8();
1438         fiz3             = _fjsp_setzero_v2r8();
1439
1440         /* Start inner kernel loop */
1441         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1442         {
1443
1444             /* Get j neighbor index, and coordinate index */
1445             jnrA             = jjnr[jidx];
1446             jnrB             = jjnr[jidx+1];
1447             j_coord_offsetA  = DIM*jnrA;
1448             j_coord_offsetB  = DIM*jnrB;
1449
1450             /* load j atom coordinates */
1451             gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1452                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1453                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1454
1455             /* Calculate displacement vector */
1456             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1457             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1458             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1459             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1460             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1461             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1462             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1463             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1464             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1465             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1466             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1467             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1468             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1469             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1470             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1471             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1472             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1473             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1474             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1475             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1476             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1477             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1478             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1479             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1480             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1481             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1482             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1483             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1484             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1485             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1486
1487             /* Calculate squared distance and things based on it */
1488             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1489             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1490             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1491             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1492             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1493             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1494             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1495             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1496             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1497             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1498
1499             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1500             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1501             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1502             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1503             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1504             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1505             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1506             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1507             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1508             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1509
1510             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1511             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1512             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1513             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1514             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1515             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1516             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1517             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1518             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1519
1520             fjx0             = _fjsp_setzero_v2r8();
1521             fjy0             = _fjsp_setzero_v2r8();
1522             fjz0             = _fjsp_setzero_v2r8();
1523             fjx1             = _fjsp_setzero_v2r8();
1524             fjy1             = _fjsp_setzero_v2r8();
1525             fjz1             = _fjsp_setzero_v2r8();
1526             fjx2             = _fjsp_setzero_v2r8();
1527             fjy2             = _fjsp_setzero_v2r8();
1528             fjz2             = _fjsp_setzero_v2r8();
1529             fjx3             = _fjsp_setzero_v2r8();
1530             fjy3             = _fjsp_setzero_v2r8();
1531             fjz3             = _fjsp_setzero_v2r8();
1532
1533             /**************************
1534              * CALCULATE INTERACTIONS *
1535              **************************/
1536
1537             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1538
1539             /* Calculate table index by multiplying r with table scale and truncate to integer */
1540             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1541             itab_tmp         = _fjsp_dtox_v2r8(rt);
1542             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1543             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1544             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1545
1546             vfconv.i[0]     *= 8;
1547             vfconv.i[1]     *= 8;
1548
1549             /* CUBIC SPLINE TABLE DISPERSION */
1550             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1551             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1552             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1553             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1554             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1555             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1556             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1557             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1558             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1559
1560             /* CUBIC SPLINE TABLE REPULSION */
1561             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1562             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1563             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1564             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1565             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1566             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1567             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1568             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1569             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1570             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1571
1572             fscal            = fvdw;
1573
1574             /* Update vectorial force */
1575             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1576             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1577             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1578             
1579             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1580             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1581             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1582
1583             /**************************
1584              * CALCULATE INTERACTIONS *
1585              **************************/
1586
1587             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1588
1589             /* EWALD ELECTROSTATICS */
1590
1591             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1592             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1593             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1594             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1595             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1596
1597             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1598                                          &ewtabF,&ewtabFn);
1599             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1600             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1601
1602             fscal            = felec;
1603
1604             /* Update vectorial force */
1605             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1606             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1607             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1608             
1609             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1610             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1611             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1612
1613             /**************************
1614              * CALCULATE INTERACTIONS *
1615              **************************/
1616
1617             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1618
1619             /* EWALD ELECTROSTATICS */
1620
1621             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1622             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1623             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1624             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1625             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1626
1627             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1628                                          &ewtabF,&ewtabFn);
1629             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1630             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1631
1632             fscal            = felec;
1633
1634             /* Update vectorial force */
1635             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1636             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1637             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1638             
1639             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1640             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1641             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1642
1643             /**************************
1644              * CALCULATE INTERACTIONS *
1645              **************************/
1646
1647             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
1648
1649             /* EWALD ELECTROSTATICS */
1650
1651             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1652             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
1653             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1654             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1655             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1656
1657             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1658                                          &ewtabF,&ewtabFn);
1659             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1660             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
1661
1662             fscal            = felec;
1663
1664             /* Update vectorial force */
1665             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1666             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1667             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1668             
1669             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1670             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1671             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1672
1673             /**************************
1674              * CALCULATE INTERACTIONS *
1675              **************************/
1676
1677             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1678
1679             /* EWALD ELECTROSTATICS */
1680
1681             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1682             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1683             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1684             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1685             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1686
1687             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1688                                          &ewtabF,&ewtabFn);
1689             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1690             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1691
1692             fscal            = felec;
1693
1694             /* Update vectorial force */
1695             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1696             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1697             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1698             
1699             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1700             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1701             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1702
1703             /**************************
1704              * CALCULATE INTERACTIONS *
1705              **************************/
1706
1707             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1708
1709             /* EWALD ELECTROSTATICS */
1710
1711             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1712             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1713             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1714             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1715             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1716
1717             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1718                                          &ewtabF,&ewtabFn);
1719             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1720             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1721
1722             fscal            = felec;
1723
1724             /* Update vectorial force */
1725             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1726             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1727             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1728             
1729             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1730             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1731             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1732
1733             /**************************
1734              * CALCULATE INTERACTIONS *
1735              **************************/
1736
1737             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
1738
1739             /* EWALD ELECTROSTATICS */
1740
1741             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1742             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
1743             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1744             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1745             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1746
1747             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1748                                          &ewtabF,&ewtabFn);
1749             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1750             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
1751
1752             fscal            = felec;
1753
1754             /* Update vectorial force */
1755             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1756             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1757             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1758             
1759             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1760             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1761             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1762
1763             /**************************
1764              * CALCULATE INTERACTIONS *
1765              **************************/
1766
1767             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
1768
1769             /* EWALD ELECTROSTATICS */
1770
1771             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1772             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
1773             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1774             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1775             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1776
1777             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1778                                          &ewtabF,&ewtabFn);
1779             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1780             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
1781
1782             fscal            = felec;
1783
1784             /* Update vectorial force */
1785             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1786             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1787             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1788             
1789             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1790             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1791             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1792
1793             /**************************
1794              * CALCULATE INTERACTIONS *
1795              **************************/
1796
1797             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
1798
1799             /* EWALD ELECTROSTATICS */
1800
1801             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1802             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
1803             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1804             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1805             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1806
1807             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1808                                          &ewtabF,&ewtabFn);
1809             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1810             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
1811
1812             fscal            = felec;
1813
1814             /* Update vectorial force */
1815             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1816             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1817             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1818             
1819             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1820             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1821             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1822
1823             /**************************
1824              * CALCULATE INTERACTIONS *
1825              **************************/
1826
1827             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
1828
1829             /* EWALD ELECTROSTATICS */
1830
1831             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1832             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
1833             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1834             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1835             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1836
1837             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1838                                          &ewtabF,&ewtabFn);
1839             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1840             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
1841
1842             fscal            = felec;
1843
1844             /* Update vectorial force */
1845             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1846             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1847             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1848             
1849             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1850             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1851             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1852
1853             gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1854
1855             /* Inner loop uses 405 flops */
1856         }
1857
1858         if(jidx<j_index_end)
1859         {
1860
1861             jnrA             = jjnr[jidx];
1862             j_coord_offsetA  = DIM*jnrA;
1863
1864             /* load j atom coordinates */
1865             gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1866                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1867                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1868
1869             /* Calculate displacement vector */
1870             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1871             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1872             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1873             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1874             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1875             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1876             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1877             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1878             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1879             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1880             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1881             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1882             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1883             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1884             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1885             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1886             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1887             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1888             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1889             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1890             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1891             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1892             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1893             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1894             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1895             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1896             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1897             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1898             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1899             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1900
1901             /* Calculate squared distance and things based on it */
1902             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1903             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1904             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1905             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1906             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1907             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1908             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1909             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1910             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1911             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1912
1913             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1914             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1915             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1916             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1917             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1918             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1919             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1920             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1921             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1922             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1923
1924             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1925             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1926             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1927             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1928             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1929             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1930             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1931             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1932             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1933
1934             fjx0             = _fjsp_setzero_v2r8();
1935             fjy0             = _fjsp_setzero_v2r8();
1936             fjz0             = _fjsp_setzero_v2r8();
1937             fjx1             = _fjsp_setzero_v2r8();
1938             fjy1             = _fjsp_setzero_v2r8();
1939             fjz1             = _fjsp_setzero_v2r8();
1940             fjx2             = _fjsp_setzero_v2r8();
1941             fjy2             = _fjsp_setzero_v2r8();
1942             fjz2             = _fjsp_setzero_v2r8();
1943             fjx3             = _fjsp_setzero_v2r8();
1944             fjy3             = _fjsp_setzero_v2r8();
1945             fjz3             = _fjsp_setzero_v2r8();
1946
1947             /**************************
1948              * CALCULATE INTERACTIONS *
1949              **************************/
1950
1951             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1952
1953             /* Calculate table index by multiplying r with table scale and truncate to integer */
1954             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1955             itab_tmp         = _fjsp_dtox_v2r8(rt);
1956             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1957             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1958             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1959
1960             vfconv.i[0]     *= 8;
1961             vfconv.i[1]     *= 8;
1962
1963             /* CUBIC SPLINE TABLE DISPERSION */
1964             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1965             F                = _fjsp_setzero_v2r8();
1966             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1967             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1968             H                = _fjsp_setzero_v2r8();
1969             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1970             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1971             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1972             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1973
1974             /* CUBIC SPLINE TABLE REPULSION */
1975             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1976             F                = _fjsp_setzero_v2r8();
1977             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1978             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1979             H                = _fjsp_setzero_v2r8();
1980             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1981             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1982             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1983             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1984             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1985
1986             fscal            = fvdw;
1987
1988             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1989
1990             /* Update vectorial force */
1991             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1992             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1993             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1994             
1995             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1996             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1997             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1998
1999             /**************************
2000              * CALCULATE INTERACTIONS *
2001              **************************/
2002
2003             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
2004
2005             /* EWALD ELECTROSTATICS */
2006
2007             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2008             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
2009             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2010             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2011             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2012
2013             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2014             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2015             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
2016
2017             fscal            = felec;
2018
2019             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2020
2021             /* Update vectorial force */
2022             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
2023             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2024             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2025             
2026             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2027             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2028             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2029
2030             /**************************
2031              * CALCULATE INTERACTIONS *
2032              **************************/
2033
2034             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
2035
2036             /* EWALD ELECTROSTATICS */
2037
2038             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2039             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
2040             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2041             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2042             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2043
2044             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2045             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2046             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
2047
2048             fscal            = felec;
2049
2050             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2051
2052             /* Update vectorial force */
2053             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
2054             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2055             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2056             
2057             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2058             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2059             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2060
2061             /**************************
2062              * CALCULATE INTERACTIONS *
2063              **************************/
2064
2065             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
2066
2067             /* EWALD ELECTROSTATICS */
2068
2069             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2070             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
2071             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2072             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2073             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2074
2075             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2076             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2077             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
2078
2079             fscal            = felec;
2080
2081             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2082
2083             /* Update vectorial force */
2084             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
2085             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
2086             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
2087             
2088             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
2089             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
2090             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
2091
2092             /**************************
2093              * CALCULATE INTERACTIONS *
2094              **************************/
2095
2096             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
2097
2098             /* EWALD ELECTROSTATICS */
2099
2100             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2101             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
2102             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2103             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2104             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2105
2106             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2107             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2108             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
2109
2110             fscal            = felec;
2111
2112             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2113
2114             /* Update vectorial force */
2115             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
2116             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2117             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2118             
2119             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2120             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2121             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2122
2123             /**************************
2124              * CALCULATE INTERACTIONS *
2125              **************************/
2126
2127             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
2128
2129             /* EWALD ELECTROSTATICS */
2130
2131             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2132             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
2133             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2134             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2135             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2136
2137             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2138             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2139             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
2140
2141             fscal            = felec;
2142
2143             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2144
2145             /* Update vectorial force */
2146             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
2147             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2148             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2149             
2150             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2151             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2152             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2153
2154             /**************************
2155              * CALCULATE INTERACTIONS *
2156              **************************/
2157
2158             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
2159
2160             /* EWALD ELECTROSTATICS */
2161
2162             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2163             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
2164             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2165             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2166             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2167
2168             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2169             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2170             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
2171
2172             fscal            = felec;
2173
2174             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2175
2176             /* Update vectorial force */
2177             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
2178             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
2179             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
2180             
2181             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
2182             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
2183             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
2184
2185             /**************************
2186              * CALCULATE INTERACTIONS *
2187              **************************/
2188
2189             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
2190
2191             /* EWALD ELECTROSTATICS */
2192
2193             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2194             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
2195             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2196             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2197             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2198
2199             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2200             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2201             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
2202
2203             fscal            = felec;
2204
2205             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2206
2207             /* Update vectorial force */
2208             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
2209             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
2210             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
2211             
2212             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
2213             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
2214             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
2215
2216             /**************************
2217              * CALCULATE INTERACTIONS *
2218              **************************/
2219
2220             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
2221
2222             /* EWALD ELECTROSTATICS */
2223
2224             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2225             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
2226             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2227             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2228             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2229
2230             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2231             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2232             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
2233
2234             fscal            = felec;
2235
2236             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2237
2238             /* Update vectorial force */
2239             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
2240             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
2241             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
2242             
2243             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
2244             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
2245             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
2246
2247             /**************************
2248              * CALCULATE INTERACTIONS *
2249              **************************/
2250
2251             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
2252
2253             /* EWALD ELECTROSTATICS */
2254
2255             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2256             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
2257             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2258             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2259             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2260
2261             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2262             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2263             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
2264
2265             fscal            = felec;
2266
2267             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2268
2269             /* Update vectorial force */
2270             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
2271             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
2272             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
2273             
2274             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
2275             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
2276             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
2277
2278             gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2279
2280             /* Inner loop uses 405 flops */
2281         }
2282
2283         /* End of innermost loop */
2284
2285         gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2286                                               f+i_coord_offset,fshift+i_shift_offset);
2287
2288         /* Increment number of inner iterations */
2289         inneriter                  += j_index_end - j_index_start;
2290
2291         /* Outer loop uses 24 flops */
2292     }
2293
2294     /* Increment number of outer iterations */
2295     outeriter        += nri;
2296
2297     /* Update outer/inner flops */
2298
2299     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*405);
2300 }