Remove no-inline-max-size and suppress remark
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecCSTab_VdwNone_GeomW4W4_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "gromacs/legacyheaders/vec.h"
47 #include "nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: CubicSplineTable
54  * VdW interaction:            None
55  * Geometry:                   Water4-Water4
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset1;
82     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
83     int              vdwioffset2;
84     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85     int              vdwioffset3;
86     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
87     int              vdwjidx1A,vdwjidx1B;
88     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89     int              vdwjidx2A,vdwjidx2B;
90     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91     int              vdwjidx3A,vdwjidx3B;
92     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
93     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
94     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
95     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
96     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
97     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
98     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
99     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
100     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
101     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
102     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
103     real             *charge;
104     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
105     real             *vftab;
106     _fjsp_v2r8       itab_tmp;
107     _fjsp_v2r8       dummy_mask,cutoff_mask;
108     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
109     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
110     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
111
112     x                = xx[0];
113     f                = ff[0];
114
115     nri              = nlist->nri;
116     iinr             = nlist->iinr;
117     jindex           = nlist->jindex;
118     jjnr             = nlist->jjnr;
119     shiftidx         = nlist->shift;
120     gid              = nlist->gid;
121     shiftvec         = fr->shift_vec[0];
122     fshift           = fr->fshift[0];
123     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
124     charge           = mdatoms->chargeA;
125
126     vftab            = kernel_data->table_elec->data;
127     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
128
129     /* Setup water-specific parameters */
130     inr              = nlist->iinr[0];
131     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
132     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
133     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
134
135     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
136     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
137     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
138     qq11             = _fjsp_mul_v2r8(iq1,jq1);
139     qq12             = _fjsp_mul_v2r8(iq1,jq2);
140     qq13             = _fjsp_mul_v2r8(iq1,jq3);
141     qq21             = _fjsp_mul_v2r8(iq2,jq1);
142     qq22             = _fjsp_mul_v2r8(iq2,jq2);
143     qq23             = _fjsp_mul_v2r8(iq2,jq3);
144     qq31             = _fjsp_mul_v2r8(iq3,jq1);
145     qq32             = _fjsp_mul_v2r8(iq3,jq2);
146     qq33             = _fjsp_mul_v2r8(iq3,jq3);
147
148     /* Avoid stupid compiler warnings */
149     jnrA = jnrB = 0;
150     j_coord_offsetA = 0;
151     j_coord_offsetB = 0;
152
153     outeriter        = 0;
154     inneriter        = 0;
155
156     /* Start outer loop over neighborlists */
157     for(iidx=0; iidx<nri; iidx++)
158     {
159         /* Load shift vector for this list */
160         i_shift_offset   = DIM*shiftidx[iidx];
161
162         /* Load limits for loop over neighbors */
163         j_index_start    = jindex[iidx];
164         j_index_end      = jindex[iidx+1];
165
166         /* Get outer coordinate index */
167         inr              = iinr[iidx];
168         i_coord_offset   = DIM*inr;
169
170         /* Load i particle coords and add shift vector */
171         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
172                                                  &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
173
174         fix1             = _fjsp_setzero_v2r8();
175         fiy1             = _fjsp_setzero_v2r8();
176         fiz1             = _fjsp_setzero_v2r8();
177         fix2             = _fjsp_setzero_v2r8();
178         fiy2             = _fjsp_setzero_v2r8();
179         fiz2             = _fjsp_setzero_v2r8();
180         fix3             = _fjsp_setzero_v2r8();
181         fiy3             = _fjsp_setzero_v2r8();
182         fiz3             = _fjsp_setzero_v2r8();
183
184         /* Reset potential sums */
185         velecsum         = _fjsp_setzero_v2r8();
186
187         /* Start inner kernel loop */
188         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
189         {
190
191             /* Get j neighbor index, and coordinate index */
192             jnrA             = jjnr[jidx];
193             jnrB             = jjnr[jidx+1];
194             j_coord_offsetA  = DIM*jnrA;
195             j_coord_offsetB  = DIM*jnrB;
196
197             /* load j atom coordinates */
198             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
199                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
200
201             /* Calculate displacement vector */
202             dx11             = _fjsp_sub_v2r8(ix1,jx1);
203             dy11             = _fjsp_sub_v2r8(iy1,jy1);
204             dz11             = _fjsp_sub_v2r8(iz1,jz1);
205             dx12             = _fjsp_sub_v2r8(ix1,jx2);
206             dy12             = _fjsp_sub_v2r8(iy1,jy2);
207             dz12             = _fjsp_sub_v2r8(iz1,jz2);
208             dx13             = _fjsp_sub_v2r8(ix1,jx3);
209             dy13             = _fjsp_sub_v2r8(iy1,jy3);
210             dz13             = _fjsp_sub_v2r8(iz1,jz3);
211             dx21             = _fjsp_sub_v2r8(ix2,jx1);
212             dy21             = _fjsp_sub_v2r8(iy2,jy1);
213             dz21             = _fjsp_sub_v2r8(iz2,jz1);
214             dx22             = _fjsp_sub_v2r8(ix2,jx2);
215             dy22             = _fjsp_sub_v2r8(iy2,jy2);
216             dz22             = _fjsp_sub_v2r8(iz2,jz2);
217             dx23             = _fjsp_sub_v2r8(ix2,jx3);
218             dy23             = _fjsp_sub_v2r8(iy2,jy3);
219             dz23             = _fjsp_sub_v2r8(iz2,jz3);
220             dx31             = _fjsp_sub_v2r8(ix3,jx1);
221             dy31             = _fjsp_sub_v2r8(iy3,jy1);
222             dz31             = _fjsp_sub_v2r8(iz3,jz1);
223             dx32             = _fjsp_sub_v2r8(ix3,jx2);
224             dy32             = _fjsp_sub_v2r8(iy3,jy2);
225             dz32             = _fjsp_sub_v2r8(iz3,jz2);
226             dx33             = _fjsp_sub_v2r8(ix3,jx3);
227             dy33             = _fjsp_sub_v2r8(iy3,jy3);
228             dz33             = _fjsp_sub_v2r8(iz3,jz3);
229
230             /* Calculate squared distance and things based on it */
231             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
232             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
233             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
234             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
235             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
236             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
237             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
238             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
239             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
240
241             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
242             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
243             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
244             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
245             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
246             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
247             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
248             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
249             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
250
251             fjx1             = _fjsp_setzero_v2r8();
252             fjy1             = _fjsp_setzero_v2r8();
253             fjz1             = _fjsp_setzero_v2r8();
254             fjx2             = _fjsp_setzero_v2r8();
255             fjy2             = _fjsp_setzero_v2r8();
256             fjz2             = _fjsp_setzero_v2r8();
257             fjx3             = _fjsp_setzero_v2r8();
258             fjy3             = _fjsp_setzero_v2r8();
259             fjz3             = _fjsp_setzero_v2r8();
260
261             /**************************
262              * CALCULATE INTERACTIONS *
263              **************************/
264
265             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
266
267             /* Calculate table index by multiplying r with table scale and truncate to integer */
268             rt               = _fjsp_mul_v2r8(r11,vftabscale);
269             itab_tmp         = _fjsp_dtox_v2r8(rt);
270             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
271             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
272             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
273
274             vfconv.i[0]     *= 4;
275             vfconv.i[1]     *= 4;
276
277             /* CUBIC SPLINE TABLE ELECTROSTATICS */
278             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
279             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
280             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
281             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
282             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
283             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
284             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
285             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
286             velec            = _fjsp_mul_v2r8(qq11,VV);
287             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
288             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
289
290             /* Update potential sum for this i atom from the interaction with this j atom. */
291             velecsum         = _fjsp_add_v2r8(velecsum,velec);
292
293             fscal            = felec;
294
295             /* Update vectorial force */
296             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
297             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
298             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
299             
300             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
301             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
302             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
303
304             /**************************
305              * CALCULATE INTERACTIONS *
306              **************************/
307
308             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
309
310             /* Calculate table index by multiplying r with table scale and truncate to integer */
311             rt               = _fjsp_mul_v2r8(r12,vftabscale);
312             itab_tmp         = _fjsp_dtox_v2r8(rt);
313             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
314             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
315             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
316
317             vfconv.i[0]     *= 4;
318             vfconv.i[1]     *= 4;
319
320             /* CUBIC SPLINE TABLE ELECTROSTATICS */
321             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
322             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
323             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
324             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
325             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
326             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
327             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
328             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
329             velec            = _fjsp_mul_v2r8(qq12,VV);
330             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
331             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
332
333             /* Update potential sum for this i atom from the interaction with this j atom. */
334             velecsum         = _fjsp_add_v2r8(velecsum,velec);
335
336             fscal            = felec;
337
338             /* Update vectorial force */
339             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
340             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
341             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
342             
343             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
344             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
345             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
346
347             /**************************
348              * CALCULATE INTERACTIONS *
349              **************************/
350
351             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
352
353             /* Calculate table index by multiplying r with table scale and truncate to integer */
354             rt               = _fjsp_mul_v2r8(r13,vftabscale);
355             itab_tmp         = _fjsp_dtox_v2r8(rt);
356             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
357             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
358             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
359
360             vfconv.i[0]     *= 4;
361             vfconv.i[1]     *= 4;
362
363             /* CUBIC SPLINE TABLE ELECTROSTATICS */
364             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
365             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
366             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
367             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
368             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
369             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
370             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
371             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
372             velec            = _fjsp_mul_v2r8(qq13,VV);
373             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
374             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
375
376             /* Update potential sum for this i atom from the interaction with this j atom. */
377             velecsum         = _fjsp_add_v2r8(velecsum,velec);
378
379             fscal            = felec;
380
381             /* Update vectorial force */
382             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
383             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
384             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
385             
386             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
387             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
388             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
389
390             /**************************
391              * CALCULATE INTERACTIONS *
392              **************************/
393
394             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
395
396             /* Calculate table index by multiplying r with table scale and truncate to integer */
397             rt               = _fjsp_mul_v2r8(r21,vftabscale);
398             itab_tmp         = _fjsp_dtox_v2r8(rt);
399             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
400             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
401             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
402
403             vfconv.i[0]     *= 4;
404             vfconv.i[1]     *= 4;
405
406             /* CUBIC SPLINE TABLE ELECTROSTATICS */
407             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
408             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
409             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
410             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
411             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
412             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
413             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
414             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
415             velec            = _fjsp_mul_v2r8(qq21,VV);
416             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
417             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
418
419             /* Update potential sum for this i atom from the interaction with this j atom. */
420             velecsum         = _fjsp_add_v2r8(velecsum,velec);
421
422             fscal            = felec;
423
424             /* Update vectorial force */
425             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
426             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
427             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
428             
429             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
430             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
431             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
432
433             /**************************
434              * CALCULATE INTERACTIONS *
435              **************************/
436
437             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
438
439             /* Calculate table index by multiplying r with table scale and truncate to integer */
440             rt               = _fjsp_mul_v2r8(r22,vftabscale);
441             itab_tmp         = _fjsp_dtox_v2r8(rt);
442             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
443             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
444             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
445
446             vfconv.i[0]     *= 4;
447             vfconv.i[1]     *= 4;
448
449             /* CUBIC SPLINE TABLE ELECTROSTATICS */
450             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
451             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
452             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
453             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
454             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
455             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
456             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
457             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
458             velec            = _fjsp_mul_v2r8(qq22,VV);
459             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
460             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
461
462             /* Update potential sum for this i atom from the interaction with this j atom. */
463             velecsum         = _fjsp_add_v2r8(velecsum,velec);
464
465             fscal            = felec;
466
467             /* Update vectorial force */
468             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
469             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
470             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
471             
472             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
473             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
474             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
475
476             /**************************
477              * CALCULATE INTERACTIONS *
478              **************************/
479
480             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
481
482             /* Calculate table index by multiplying r with table scale and truncate to integer */
483             rt               = _fjsp_mul_v2r8(r23,vftabscale);
484             itab_tmp         = _fjsp_dtox_v2r8(rt);
485             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
486             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
487             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
488
489             vfconv.i[0]     *= 4;
490             vfconv.i[1]     *= 4;
491
492             /* CUBIC SPLINE TABLE ELECTROSTATICS */
493             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
494             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
495             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
496             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
497             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
498             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
499             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
500             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
501             velec            = _fjsp_mul_v2r8(qq23,VV);
502             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
503             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
504
505             /* Update potential sum for this i atom from the interaction with this j atom. */
506             velecsum         = _fjsp_add_v2r8(velecsum,velec);
507
508             fscal            = felec;
509
510             /* Update vectorial force */
511             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
512             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
513             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
514             
515             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
516             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
517             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
518
519             /**************************
520              * CALCULATE INTERACTIONS *
521              **************************/
522
523             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
524
525             /* Calculate table index by multiplying r with table scale and truncate to integer */
526             rt               = _fjsp_mul_v2r8(r31,vftabscale);
527             itab_tmp         = _fjsp_dtox_v2r8(rt);
528             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
529             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
530             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
531
532             vfconv.i[0]     *= 4;
533             vfconv.i[1]     *= 4;
534
535             /* CUBIC SPLINE TABLE ELECTROSTATICS */
536             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
537             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
538             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
539             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
540             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
541             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
542             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
543             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
544             velec            = _fjsp_mul_v2r8(qq31,VV);
545             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
546             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
547
548             /* Update potential sum for this i atom from the interaction with this j atom. */
549             velecsum         = _fjsp_add_v2r8(velecsum,velec);
550
551             fscal            = felec;
552
553             /* Update vectorial force */
554             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
555             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
556             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
557             
558             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
559             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
560             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
561
562             /**************************
563              * CALCULATE INTERACTIONS *
564              **************************/
565
566             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
567
568             /* Calculate table index by multiplying r with table scale and truncate to integer */
569             rt               = _fjsp_mul_v2r8(r32,vftabscale);
570             itab_tmp         = _fjsp_dtox_v2r8(rt);
571             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
572             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
573             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
574
575             vfconv.i[0]     *= 4;
576             vfconv.i[1]     *= 4;
577
578             /* CUBIC SPLINE TABLE ELECTROSTATICS */
579             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
580             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
581             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
582             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
583             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
584             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
585             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
586             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
587             velec            = _fjsp_mul_v2r8(qq32,VV);
588             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
589             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
590
591             /* Update potential sum for this i atom from the interaction with this j atom. */
592             velecsum         = _fjsp_add_v2r8(velecsum,velec);
593
594             fscal            = felec;
595
596             /* Update vectorial force */
597             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
598             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
599             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
600             
601             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
602             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
603             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
604
605             /**************************
606              * CALCULATE INTERACTIONS *
607              **************************/
608
609             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
610
611             /* Calculate table index by multiplying r with table scale and truncate to integer */
612             rt               = _fjsp_mul_v2r8(r33,vftabscale);
613             itab_tmp         = _fjsp_dtox_v2r8(rt);
614             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
615             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
616             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
617
618             vfconv.i[0]     *= 4;
619             vfconv.i[1]     *= 4;
620
621             /* CUBIC SPLINE TABLE ELECTROSTATICS */
622             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
623             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
624             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
625             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
626             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
627             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
628             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
629             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
630             velec            = _fjsp_mul_v2r8(qq33,VV);
631             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
632             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
633
634             /* Update potential sum for this i atom from the interaction with this j atom. */
635             velecsum         = _fjsp_add_v2r8(velecsum,velec);
636
637             fscal            = felec;
638
639             /* Update vectorial force */
640             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
641             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
642             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
643             
644             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
645             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
646             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
647
648             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
649
650             /* Inner loop uses 414 flops */
651         }
652
653         if(jidx<j_index_end)
654         {
655
656             jnrA             = jjnr[jidx];
657             j_coord_offsetA  = DIM*jnrA;
658
659             /* load j atom coordinates */
660             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
661                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
662
663             /* Calculate displacement vector */
664             dx11             = _fjsp_sub_v2r8(ix1,jx1);
665             dy11             = _fjsp_sub_v2r8(iy1,jy1);
666             dz11             = _fjsp_sub_v2r8(iz1,jz1);
667             dx12             = _fjsp_sub_v2r8(ix1,jx2);
668             dy12             = _fjsp_sub_v2r8(iy1,jy2);
669             dz12             = _fjsp_sub_v2r8(iz1,jz2);
670             dx13             = _fjsp_sub_v2r8(ix1,jx3);
671             dy13             = _fjsp_sub_v2r8(iy1,jy3);
672             dz13             = _fjsp_sub_v2r8(iz1,jz3);
673             dx21             = _fjsp_sub_v2r8(ix2,jx1);
674             dy21             = _fjsp_sub_v2r8(iy2,jy1);
675             dz21             = _fjsp_sub_v2r8(iz2,jz1);
676             dx22             = _fjsp_sub_v2r8(ix2,jx2);
677             dy22             = _fjsp_sub_v2r8(iy2,jy2);
678             dz22             = _fjsp_sub_v2r8(iz2,jz2);
679             dx23             = _fjsp_sub_v2r8(ix2,jx3);
680             dy23             = _fjsp_sub_v2r8(iy2,jy3);
681             dz23             = _fjsp_sub_v2r8(iz2,jz3);
682             dx31             = _fjsp_sub_v2r8(ix3,jx1);
683             dy31             = _fjsp_sub_v2r8(iy3,jy1);
684             dz31             = _fjsp_sub_v2r8(iz3,jz1);
685             dx32             = _fjsp_sub_v2r8(ix3,jx2);
686             dy32             = _fjsp_sub_v2r8(iy3,jy2);
687             dz32             = _fjsp_sub_v2r8(iz3,jz2);
688             dx33             = _fjsp_sub_v2r8(ix3,jx3);
689             dy33             = _fjsp_sub_v2r8(iy3,jy3);
690             dz33             = _fjsp_sub_v2r8(iz3,jz3);
691
692             /* Calculate squared distance and things based on it */
693             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
694             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
695             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
696             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
697             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
698             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
699             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
700             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
701             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
702
703             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
704             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
705             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
706             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
707             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
708             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
709             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
710             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
711             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
712
713             fjx1             = _fjsp_setzero_v2r8();
714             fjy1             = _fjsp_setzero_v2r8();
715             fjz1             = _fjsp_setzero_v2r8();
716             fjx2             = _fjsp_setzero_v2r8();
717             fjy2             = _fjsp_setzero_v2r8();
718             fjz2             = _fjsp_setzero_v2r8();
719             fjx3             = _fjsp_setzero_v2r8();
720             fjy3             = _fjsp_setzero_v2r8();
721             fjz3             = _fjsp_setzero_v2r8();
722
723             /**************************
724              * CALCULATE INTERACTIONS *
725              **************************/
726
727             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
728
729             /* Calculate table index by multiplying r with table scale and truncate to integer */
730             rt               = _fjsp_mul_v2r8(r11,vftabscale);
731             itab_tmp         = _fjsp_dtox_v2r8(rt);
732             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
733             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
734             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
735
736             vfconv.i[0]     *= 4;
737             vfconv.i[1]     *= 4;
738
739             /* CUBIC SPLINE TABLE ELECTROSTATICS */
740             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
741             F                = _fjsp_setzero_v2r8();
742             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
743             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
744             H                = _fjsp_setzero_v2r8();
745             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
746             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
747             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
748             velec            = _fjsp_mul_v2r8(qq11,VV);
749             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
750             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
751
752             /* Update potential sum for this i atom from the interaction with this j atom. */
753             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
754             velecsum         = _fjsp_add_v2r8(velecsum,velec);
755
756             fscal            = felec;
757
758             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
759
760             /* Update vectorial force */
761             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
762             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
763             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
764             
765             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
766             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
767             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
768
769             /**************************
770              * CALCULATE INTERACTIONS *
771              **************************/
772
773             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
774
775             /* Calculate table index by multiplying r with table scale and truncate to integer */
776             rt               = _fjsp_mul_v2r8(r12,vftabscale);
777             itab_tmp         = _fjsp_dtox_v2r8(rt);
778             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
779             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
780             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
781
782             vfconv.i[0]     *= 4;
783             vfconv.i[1]     *= 4;
784
785             /* CUBIC SPLINE TABLE ELECTROSTATICS */
786             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
787             F                = _fjsp_setzero_v2r8();
788             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
789             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
790             H                = _fjsp_setzero_v2r8();
791             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
792             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
793             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
794             velec            = _fjsp_mul_v2r8(qq12,VV);
795             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
796             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
797
798             /* Update potential sum for this i atom from the interaction with this j atom. */
799             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
800             velecsum         = _fjsp_add_v2r8(velecsum,velec);
801
802             fscal            = felec;
803
804             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
805
806             /* Update vectorial force */
807             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
808             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
809             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
810             
811             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
812             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
813             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
814
815             /**************************
816              * CALCULATE INTERACTIONS *
817              **************************/
818
819             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
820
821             /* Calculate table index by multiplying r with table scale and truncate to integer */
822             rt               = _fjsp_mul_v2r8(r13,vftabscale);
823             itab_tmp         = _fjsp_dtox_v2r8(rt);
824             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
825             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
826             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
827
828             vfconv.i[0]     *= 4;
829             vfconv.i[1]     *= 4;
830
831             /* CUBIC SPLINE TABLE ELECTROSTATICS */
832             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
833             F                = _fjsp_setzero_v2r8();
834             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
835             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
836             H                = _fjsp_setzero_v2r8();
837             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
838             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
839             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
840             velec            = _fjsp_mul_v2r8(qq13,VV);
841             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
842             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
843
844             /* Update potential sum for this i atom from the interaction with this j atom. */
845             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
846             velecsum         = _fjsp_add_v2r8(velecsum,velec);
847
848             fscal            = felec;
849
850             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
851
852             /* Update vectorial force */
853             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
854             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
855             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
856             
857             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
858             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
859             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
860
861             /**************************
862              * CALCULATE INTERACTIONS *
863              **************************/
864
865             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
866
867             /* Calculate table index by multiplying r with table scale and truncate to integer */
868             rt               = _fjsp_mul_v2r8(r21,vftabscale);
869             itab_tmp         = _fjsp_dtox_v2r8(rt);
870             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
871             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
872             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
873
874             vfconv.i[0]     *= 4;
875             vfconv.i[1]     *= 4;
876
877             /* CUBIC SPLINE TABLE ELECTROSTATICS */
878             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
879             F                = _fjsp_setzero_v2r8();
880             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
881             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
882             H                = _fjsp_setzero_v2r8();
883             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
884             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
885             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
886             velec            = _fjsp_mul_v2r8(qq21,VV);
887             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
888             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
889
890             /* Update potential sum for this i atom from the interaction with this j atom. */
891             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
892             velecsum         = _fjsp_add_v2r8(velecsum,velec);
893
894             fscal            = felec;
895
896             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
897
898             /* Update vectorial force */
899             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
900             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
901             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
902             
903             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
904             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
905             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
906
907             /**************************
908              * CALCULATE INTERACTIONS *
909              **************************/
910
911             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
912
913             /* Calculate table index by multiplying r with table scale and truncate to integer */
914             rt               = _fjsp_mul_v2r8(r22,vftabscale);
915             itab_tmp         = _fjsp_dtox_v2r8(rt);
916             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
917             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
918             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
919
920             vfconv.i[0]     *= 4;
921             vfconv.i[1]     *= 4;
922
923             /* CUBIC SPLINE TABLE ELECTROSTATICS */
924             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
925             F                = _fjsp_setzero_v2r8();
926             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
927             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
928             H                = _fjsp_setzero_v2r8();
929             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
930             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
931             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
932             velec            = _fjsp_mul_v2r8(qq22,VV);
933             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
934             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
935
936             /* Update potential sum for this i atom from the interaction with this j atom. */
937             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
938             velecsum         = _fjsp_add_v2r8(velecsum,velec);
939
940             fscal            = felec;
941
942             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
943
944             /* Update vectorial force */
945             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
946             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
947             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
948             
949             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
950             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
951             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
952
953             /**************************
954              * CALCULATE INTERACTIONS *
955              **************************/
956
957             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
958
959             /* Calculate table index by multiplying r with table scale and truncate to integer */
960             rt               = _fjsp_mul_v2r8(r23,vftabscale);
961             itab_tmp         = _fjsp_dtox_v2r8(rt);
962             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
963             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
964             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
965
966             vfconv.i[0]     *= 4;
967             vfconv.i[1]     *= 4;
968
969             /* CUBIC SPLINE TABLE ELECTROSTATICS */
970             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
971             F                = _fjsp_setzero_v2r8();
972             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
973             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
974             H                = _fjsp_setzero_v2r8();
975             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
976             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
977             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
978             velec            = _fjsp_mul_v2r8(qq23,VV);
979             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
980             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
981
982             /* Update potential sum for this i atom from the interaction with this j atom. */
983             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
984             velecsum         = _fjsp_add_v2r8(velecsum,velec);
985
986             fscal            = felec;
987
988             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
989
990             /* Update vectorial force */
991             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
992             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
993             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
994             
995             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
996             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
997             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
998
999             /**************************
1000              * CALCULATE INTERACTIONS *
1001              **************************/
1002
1003             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
1004
1005             /* Calculate table index by multiplying r with table scale and truncate to integer */
1006             rt               = _fjsp_mul_v2r8(r31,vftabscale);
1007             itab_tmp         = _fjsp_dtox_v2r8(rt);
1008             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1009             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1010             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1011
1012             vfconv.i[0]     *= 4;
1013             vfconv.i[1]     *= 4;
1014
1015             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1016             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1017             F                = _fjsp_setzero_v2r8();
1018             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1019             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1020             H                = _fjsp_setzero_v2r8();
1021             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1022             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1023             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1024             velec            = _fjsp_mul_v2r8(qq31,VV);
1025             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1026             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
1027
1028             /* Update potential sum for this i atom from the interaction with this j atom. */
1029             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1030             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1031
1032             fscal            = felec;
1033
1034             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1035
1036             /* Update vectorial force */
1037             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1038             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1039             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1040             
1041             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1042             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1043             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1044
1045             /**************************
1046              * CALCULATE INTERACTIONS *
1047              **************************/
1048
1049             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
1050
1051             /* Calculate table index by multiplying r with table scale and truncate to integer */
1052             rt               = _fjsp_mul_v2r8(r32,vftabscale);
1053             itab_tmp         = _fjsp_dtox_v2r8(rt);
1054             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1055             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1056             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1057
1058             vfconv.i[0]     *= 4;
1059             vfconv.i[1]     *= 4;
1060
1061             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1062             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1063             F                = _fjsp_setzero_v2r8();
1064             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1065             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1066             H                = _fjsp_setzero_v2r8();
1067             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1068             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1069             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1070             velec            = _fjsp_mul_v2r8(qq32,VV);
1071             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1072             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
1073
1074             /* Update potential sum for this i atom from the interaction with this j atom. */
1075             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1076             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1077
1078             fscal            = felec;
1079
1080             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1081
1082             /* Update vectorial force */
1083             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1084             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1085             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1086             
1087             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1088             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1089             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1090
1091             /**************************
1092              * CALCULATE INTERACTIONS *
1093              **************************/
1094
1095             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
1096
1097             /* Calculate table index by multiplying r with table scale and truncate to integer */
1098             rt               = _fjsp_mul_v2r8(r33,vftabscale);
1099             itab_tmp         = _fjsp_dtox_v2r8(rt);
1100             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1101             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1102             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1103
1104             vfconv.i[0]     *= 4;
1105             vfconv.i[1]     *= 4;
1106
1107             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1108             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1109             F                = _fjsp_setzero_v2r8();
1110             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1111             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1112             H                = _fjsp_setzero_v2r8();
1113             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1114             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1115             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
1116             velec            = _fjsp_mul_v2r8(qq33,VV);
1117             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1118             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
1119
1120             /* Update potential sum for this i atom from the interaction with this j atom. */
1121             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1122             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1123
1124             fscal            = felec;
1125
1126             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1127
1128             /* Update vectorial force */
1129             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1130             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1131             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1132             
1133             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1134             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1135             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1136
1137             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1138
1139             /* Inner loop uses 414 flops */
1140         }
1141
1142         /* End of innermost loop */
1143
1144         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1145                                               f+i_coord_offset+DIM,fshift+i_shift_offset);
1146
1147         ggid                        = gid[iidx];
1148         /* Update potential energies */
1149         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1150
1151         /* Increment number of inner iterations */
1152         inneriter                  += j_index_end - j_index_start;
1153
1154         /* Outer loop uses 19 flops */
1155     }
1156
1157     /* Increment number of outer iterations */
1158     outeriter        += nri;
1159
1160     /* Update outer/inner flops */
1161
1162     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*414);
1163 }
1164 /*
1165  * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
1166  * Electrostatics interaction: CubicSplineTable
1167  * VdW interaction:            None
1168  * Geometry:                   Water4-Water4
1169  * Calculate force/pot:        Force
1170  */
1171 void
1172 nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
1173                     (t_nblist                    * gmx_restrict       nlist,
1174                      rvec                        * gmx_restrict          xx,
1175                      rvec                        * gmx_restrict          ff,
1176                      t_forcerec                  * gmx_restrict          fr,
1177                      t_mdatoms                   * gmx_restrict     mdatoms,
1178                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1179                      t_nrnb                      * gmx_restrict        nrnb)
1180 {
1181     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1182      * just 0 for non-waters.
1183      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1184      * jnr indices corresponding to data put in the four positions in the SIMD register.
1185      */
1186     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1187     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1188     int              jnrA,jnrB;
1189     int              j_coord_offsetA,j_coord_offsetB;
1190     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1191     real             rcutoff_scalar;
1192     real             *shiftvec,*fshift,*x,*f;
1193     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1194     int              vdwioffset1;
1195     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1196     int              vdwioffset2;
1197     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1198     int              vdwioffset3;
1199     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1200     int              vdwjidx1A,vdwjidx1B;
1201     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1202     int              vdwjidx2A,vdwjidx2B;
1203     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1204     int              vdwjidx3A,vdwjidx3B;
1205     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1206     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1207     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1208     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1209     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1210     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1211     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1212     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1213     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1214     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1215     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1216     real             *charge;
1217     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1218     real             *vftab;
1219     _fjsp_v2r8       itab_tmp;
1220     _fjsp_v2r8       dummy_mask,cutoff_mask;
1221     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1222     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1223     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1224
1225     x                = xx[0];
1226     f                = ff[0];
1227
1228     nri              = nlist->nri;
1229     iinr             = nlist->iinr;
1230     jindex           = nlist->jindex;
1231     jjnr             = nlist->jjnr;
1232     shiftidx         = nlist->shift;
1233     gid              = nlist->gid;
1234     shiftvec         = fr->shift_vec[0];
1235     fshift           = fr->fshift[0];
1236     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1237     charge           = mdatoms->chargeA;
1238
1239     vftab            = kernel_data->table_elec->data;
1240     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
1241
1242     /* Setup water-specific parameters */
1243     inr              = nlist->iinr[0];
1244     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1245     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1246     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1247
1248     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1249     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1250     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
1251     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1252     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1253     qq13             = _fjsp_mul_v2r8(iq1,jq3);
1254     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1255     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1256     qq23             = _fjsp_mul_v2r8(iq2,jq3);
1257     qq31             = _fjsp_mul_v2r8(iq3,jq1);
1258     qq32             = _fjsp_mul_v2r8(iq3,jq2);
1259     qq33             = _fjsp_mul_v2r8(iq3,jq3);
1260
1261     /* Avoid stupid compiler warnings */
1262     jnrA = jnrB = 0;
1263     j_coord_offsetA = 0;
1264     j_coord_offsetB = 0;
1265
1266     outeriter        = 0;
1267     inneriter        = 0;
1268
1269     /* Start outer loop over neighborlists */
1270     for(iidx=0; iidx<nri; iidx++)
1271     {
1272         /* Load shift vector for this list */
1273         i_shift_offset   = DIM*shiftidx[iidx];
1274
1275         /* Load limits for loop over neighbors */
1276         j_index_start    = jindex[iidx];
1277         j_index_end      = jindex[iidx+1];
1278
1279         /* Get outer coordinate index */
1280         inr              = iinr[iidx];
1281         i_coord_offset   = DIM*inr;
1282
1283         /* Load i particle coords and add shift vector */
1284         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
1285                                                  &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1286
1287         fix1             = _fjsp_setzero_v2r8();
1288         fiy1             = _fjsp_setzero_v2r8();
1289         fiz1             = _fjsp_setzero_v2r8();
1290         fix2             = _fjsp_setzero_v2r8();
1291         fiy2             = _fjsp_setzero_v2r8();
1292         fiz2             = _fjsp_setzero_v2r8();
1293         fix3             = _fjsp_setzero_v2r8();
1294         fiy3             = _fjsp_setzero_v2r8();
1295         fiz3             = _fjsp_setzero_v2r8();
1296
1297         /* Start inner kernel loop */
1298         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1299         {
1300
1301             /* Get j neighbor index, and coordinate index */
1302             jnrA             = jjnr[jidx];
1303             jnrB             = jjnr[jidx+1];
1304             j_coord_offsetA  = DIM*jnrA;
1305             j_coord_offsetB  = DIM*jnrB;
1306
1307             /* load j atom coordinates */
1308             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
1309                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
1310
1311             /* Calculate displacement vector */
1312             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1313             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1314             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1315             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1316             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1317             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1318             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1319             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1320             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1321             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1322             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1323             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1324             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1325             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1326             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1327             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1328             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1329             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1330             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1331             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1332             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1333             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1334             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1335             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1336             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1337             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1338             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1339
1340             /* Calculate squared distance and things based on it */
1341             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1342             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1343             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1344             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1345             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1346             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1347             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1348             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1349             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1350
1351             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1352             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1353             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1354             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1355             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1356             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1357             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1358             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1359             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1360
1361             fjx1             = _fjsp_setzero_v2r8();
1362             fjy1             = _fjsp_setzero_v2r8();
1363             fjz1             = _fjsp_setzero_v2r8();
1364             fjx2             = _fjsp_setzero_v2r8();
1365             fjy2             = _fjsp_setzero_v2r8();
1366             fjz2             = _fjsp_setzero_v2r8();
1367             fjx3             = _fjsp_setzero_v2r8();
1368             fjy3             = _fjsp_setzero_v2r8();
1369             fjz3             = _fjsp_setzero_v2r8();
1370
1371             /**************************
1372              * CALCULATE INTERACTIONS *
1373              **************************/
1374
1375             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1376
1377             /* Calculate table index by multiplying r with table scale and truncate to integer */
1378             rt               = _fjsp_mul_v2r8(r11,vftabscale);
1379             itab_tmp         = _fjsp_dtox_v2r8(rt);
1380             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1381             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1382             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1383
1384             vfconv.i[0]     *= 4;
1385             vfconv.i[1]     *= 4;
1386
1387             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1388             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1389             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1390             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1391             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1392             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1393             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1394             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1395             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1396             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1397
1398             fscal            = felec;
1399
1400             /* Update vectorial force */
1401             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1402             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1403             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1404             
1405             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1406             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1407             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1408
1409             /**************************
1410              * CALCULATE INTERACTIONS *
1411              **************************/
1412
1413             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1414
1415             /* Calculate table index by multiplying r with table scale and truncate to integer */
1416             rt               = _fjsp_mul_v2r8(r12,vftabscale);
1417             itab_tmp         = _fjsp_dtox_v2r8(rt);
1418             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1419             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1420             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1421
1422             vfconv.i[0]     *= 4;
1423             vfconv.i[1]     *= 4;
1424
1425             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1426             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1427             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1428             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1429             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1430             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1431             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1432             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1433             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1434             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1435
1436             fscal            = felec;
1437
1438             /* Update vectorial force */
1439             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1440             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1441             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1442             
1443             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1444             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1445             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1446
1447             /**************************
1448              * CALCULATE INTERACTIONS *
1449              **************************/
1450
1451             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
1452
1453             /* Calculate table index by multiplying r with table scale and truncate to integer */
1454             rt               = _fjsp_mul_v2r8(r13,vftabscale);
1455             itab_tmp         = _fjsp_dtox_v2r8(rt);
1456             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1457             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1458             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1459
1460             vfconv.i[0]     *= 4;
1461             vfconv.i[1]     *= 4;
1462
1463             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1464             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1465             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1466             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1467             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1468             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1469             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1470             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1471             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1472             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
1473
1474             fscal            = felec;
1475
1476             /* Update vectorial force */
1477             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1478             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1479             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1480             
1481             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1482             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1483             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1484
1485             /**************************
1486              * CALCULATE INTERACTIONS *
1487              **************************/
1488
1489             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1490
1491             /* Calculate table index by multiplying r with table scale and truncate to integer */
1492             rt               = _fjsp_mul_v2r8(r21,vftabscale);
1493             itab_tmp         = _fjsp_dtox_v2r8(rt);
1494             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1495             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1496             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1497
1498             vfconv.i[0]     *= 4;
1499             vfconv.i[1]     *= 4;
1500
1501             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1502             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1503             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1504             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1505             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1506             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1507             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1508             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1509             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1510             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1511
1512             fscal            = felec;
1513
1514             /* Update vectorial force */
1515             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1516             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1517             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1518             
1519             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1520             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1521             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1522
1523             /**************************
1524              * CALCULATE INTERACTIONS *
1525              **************************/
1526
1527             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1528
1529             /* Calculate table index by multiplying r with table scale and truncate to integer */
1530             rt               = _fjsp_mul_v2r8(r22,vftabscale);
1531             itab_tmp         = _fjsp_dtox_v2r8(rt);
1532             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1533             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1534             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1535
1536             vfconv.i[0]     *= 4;
1537             vfconv.i[1]     *= 4;
1538
1539             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1540             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1541             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1542             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1543             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1544             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1545             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1546             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1547             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1548             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1549
1550             fscal            = felec;
1551
1552             /* Update vectorial force */
1553             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1554             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1555             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1556             
1557             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1558             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1559             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1560
1561             /**************************
1562              * CALCULATE INTERACTIONS *
1563              **************************/
1564
1565             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
1566
1567             /* Calculate table index by multiplying r with table scale and truncate to integer */
1568             rt               = _fjsp_mul_v2r8(r23,vftabscale);
1569             itab_tmp         = _fjsp_dtox_v2r8(rt);
1570             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1571             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1572             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1573
1574             vfconv.i[0]     *= 4;
1575             vfconv.i[1]     *= 4;
1576
1577             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1578             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1579             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1580             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1581             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1582             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1583             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1584             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1585             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1586             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
1587
1588             fscal            = felec;
1589
1590             /* Update vectorial force */
1591             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1592             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1593             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1594             
1595             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1596             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1597             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1598
1599             /**************************
1600              * CALCULATE INTERACTIONS *
1601              **************************/
1602
1603             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
1604
1605             /* Calculate table index by multiplying r with table scale and truncate to integer */
1606             rt               = _fjsp_mul_v2r8(r31,vftabscale);
1607             itab_tmp         = _fjsp_dtox_v2r8(rt);
1608             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1609             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1610             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1611
1612             vfconv.i[0]     *= 4;
1613             vfconv.i[1]     *= 4;
1614
1615             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1616             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1617             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1618             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1619             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1620             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1621             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1622             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1623             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1624             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
1625
1626             fscal            = felec;
1627
1628             /* Update vectorial force */
1629             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1630             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1631             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1632             
1633             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1634             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1635             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1636
1637             /**************************
1638              * CALCULATE INTERACTIONS *
1639              **************************/
1640
1641             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
1642
1643             /* Calculate table index by multiplying r with table scale and truncate to integer */
1644             rt               = _fjsp_mul_v2r8(r32,vftabscale);
1645             itab_tmp         = _fjsp_dtox_v2r8(rt);
1646             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1647             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1648             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1649
1650             vfconv.i[0]     *= 4;
1651             vfconv.i[1]     *= 4;
1652
1653             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1654             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1655             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1656             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1657             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1658             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1659             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1660             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1661             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1662             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
1663
1664             fscal            = felec;
1665
1666             /* Update vectorial force */
1667             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1668             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1669             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1670             
1671             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1672             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1673             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1674
1675             /**************************
1676              * CALCULATE INTERACTIONS *
1677              **************************/
1678
1679             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
1680
1681             /* Calculate table index by multiplying r with table scale and truncate to integer */
1682             rt               = _fjsp_mul_v2r8(r33,vftabscale);
1683             itab_tmp         = _fjsp_dtox_v2r8(rt);
1684             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1685             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1686             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1687
1688             vfconv.i[0]     *= 4;
1689             vfconv.i[1]     *= 4;
1690
1691             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1692             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1693             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1694             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1695             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1696             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
1697             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1698             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1699             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1700             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
1701
1702             fscal            = felec;
1703
1704             /* Update vectorial force */
1705             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1706             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1707             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1708             
1709             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1710             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1711             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1712
1713             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1714
1715             /* Inner loop uses 378 flops */
1716         }
1717
1718         if(jidx<j_index_end)
1719         {
1720
1721             jnrA             = jjnr[jidx];
1722             j_coord_offsetA  = DIM*jnrA;
1723
1724             /* load j atom coordinates */
1725             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
1726                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
1727
1728             /* Calculate displacement vector */
1729             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1730             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1731             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1732             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1733             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1734             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1735             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1736             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1737             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1738             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1739             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1740             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1741             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1742             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1743             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1744             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1745             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1746             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1747             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1748             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1749             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1750             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1751             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1752             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1753             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1754             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1755             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1756
1757             /* Calculate squared distance and things based on it */
1758             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1759             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1760             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1761             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1762             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1763             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1764             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1765             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1766             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1767
1768             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1769             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1770             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1771             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1772             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1773             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1774             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1775             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1776             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1777
1778             fjx1             = _fjsp_setzero_v2r8();
1779             fjy1             = _fjsp_setzero_v2r8();
1780             fjz1             = _fjsp_setzero_v2r8();
1781             fjx2             = _fjsp_setzero_v2r8();
1782             fjy2             = _fjsp_setzero_v2r8();
1783             fjz2             = _fjsp_setzero_v2r8();
1784             fjx3             = _fjsp_setzero_v2r8();
1785             fjy3             = _fjsp_setzero_v2r8();
1786             fjz3             = _fjsp_setzero_v2r8();
1787
1788             /**************************
1789              * CALCULATE INTERACTIONS *
1790              **************************/
1791
1792             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1793
1794             /* Calculate table index by multiplying r with table scale and truncate to integer */
1795             rt               = _fjsp_mul_v2r8(r11,vftabscale);
1796             itab_tmp         = _fjsp_dtox_v2r8(rt);
1797             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1798             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1799             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1800
1801             vfconv.i[0]     *= 4;
1802             vfconv.i[1]     *= 4;
1803
1804             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1805             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1806             F                = _fjsp_setzero_v2r8();
1807             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1808             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1809             H                = _fjsp_setzero_v2r8();
1810             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1811             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1812             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1813             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
1814
1815             fscal            = felec;
1816
1817             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1818
1819             /* Update vectorial force */
1820             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1821             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1822             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1823             
1824             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1825             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1826             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1827
1828             /**************************
1829              * CALCULATE INTERACTIONS *
1830              **************************/
1831
1832             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1833
1834             /* Calculate table index by multiplying r with table scale and truncate to integer */
1835             rt               = _fjsp_mul_v2r8(r12,vftabscale);
1836             itab_tmp         = _fjsp_dtox_v2r8(rt);
1837             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1838             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1839             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1840
1841             vfconv.i[0]     *= 4;
1842             vfconv.i[1]     *= 4;
1843
1844             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1845             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1846             F                = _fjsp_setzero_v2r8();
1847             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1848             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1849             H                = _fjsp_setzero_v2r8();
1850             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1851             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1852             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1853             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
1854
1855             fscal            = felec;
1856
1857             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1858
1859             /* Update vectorial force */
1860             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1861             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1862             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1863             
1864             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1865             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1866             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1867
1868             /**************************
1869              * CALCULATE INTERACTIONS *
1870              **************************/
1871
1872             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
1873
1874             /* Calculate table index by multiplying r with table scale and truncate to integer */
1875             rt               = _fjsp_mul_v2r8(r13,vftabscale);
1876             itab_tmp         = _fjsp_dtox_v2r8(rt);
1877             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1878             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1879             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1880
1881             vfconv.i[0]     *= 4;
1882             vfconv.i[1]     *= 4;
1883
1884             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1885             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1886             F                = _fjsp_setzero_v2r8();
1887             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1888             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1889             H                = _fjsp_setzero_v2r8();
1890             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1891             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1892             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1893             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
1894
1895             fscal            = felec;
1896
1897             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1898
1899             /* Update vectorial force */
1900             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1901             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1902             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1903             
1904             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1905             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1906             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1907
1908             /**************************
1909              * CALCULATE INTERACTIONS *
1910              **************************/
1911
1912             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1913
1914             /* Calculate table index by multiplying r with table scale and truncate to integer */
1915             rt               = _fjsp_mul_v2r8(r21,vftabscale);
1916             itab_tmp         = _fjsp_dtox_v2r8(rt);
1917             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1918             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1919             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1920
1921             vfconv.i[0]     *= 4;
1922             vfconv.i[1]     *= 4;
1923
1924             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1925             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1926             F                = _fjsp_setzero_v2r8();
1927             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1928             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1929             H                = _fjsp_setzero_v2r8();
1930             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1931             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1932             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1933             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
1934
1935             fscal            = felec;
1936
1937             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1938
1939             /* Update vectorial force */
1940             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1941             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1942             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1943             
1944             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1945             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1946             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1947
1948             /**************************
1949              * CALCULATE INTERACTIONS *
1950              **************************/
1951
1952             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1953
1954             /* Calculate table index by multiplying r with table scale and truncate to integer */
1955             rt               = _fjsp_mul_v2r8(r22,vftabscale);
1956             itab_tmp         = _fjsp_dtox_v2r8(rt);
1957             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1958             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1959             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1960
1961             vfconv.i[0]     *= 4;
1962             vfconv.i[1]     *= 4;
1963
1964             /* CUBIC SPLINE TABLE ELECTROSTATICS */
1965             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1966             F                = _fjsp_setzero_v2r8();
1967             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1968             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
1969             H                = _fjsp_setzero_v2r8();
1970             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1971             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
1972             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
1973             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
1974
1975             fscal            = felec;
1976
1977             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1978
1979             /* Update vectorial force */
1980             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1981             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1982             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1983             
1984             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1985             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1986             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1987
1988             /**************************
1989              * CALCULATE INTERACTIONS *
1990              **************************/
1991
1992             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
1993
1994             /* Calculate table index by multiplying r with table scale and truncate to integer */
1995             rt               = _fjsp_mul_v2r8(r23,vftabscale);
1996             itab_tmp         = _fjsp_dtox_v2r8(rt);
1997             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1998             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1999             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2000
2001             vfconv.i[0]     *= 4;
2002             vfconv.i[1]     *= 4;
2003
2004             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2005             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2006             F                = _fjsp_setzero_v2r8();
2007             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2008             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2009             H                = _fjsp_setzero_v2r8();
2010             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2011             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2012             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2013             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
2014
2015             fscal            = felec;
2016
2017             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2018
2019             /* Update vectorial force */
2020             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
2021             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
2022             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
2023             
2024             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
2025             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
2026             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
2027
2028             /**************************
2029              * CALCULATE INTERACTIONS *
2030              **************************/
2031
2032             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
2033
2034             /* Calculate table index by multiplying r with table scale and truncate to integer */
2035             rt               = _fjsp_mul_v2r8(r31,vftabscale);
2036             itab_tmp         = _fjsp_dtox_v2r8(rt);
2037             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2038             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2039             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2040
2041             vfconv.i[0]     *= 4;
2042             vfconv.i[1]     *= 4;
2043
2044             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2045             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2046             F                = _fjsp_setzero_v2r8();
2047             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2048             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2049             H                = _fjsp_setzero_v2r8();
2050             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2051             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2052             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2053             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
2054
2055             fscal            = felec;
2056
2057             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2058
2059             /* Update vectorial force */
2060             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
2061             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
2062             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
2063             
2064             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
2065             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
2066             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
2067
2068             /**************************
2069              * CALCULATE INTERACTIONS *
2070              **************************/
2071
2072             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
2073
2074             /* Calculate table index by multiplying r with table scale and truncate to integer */
2075             rt               = _fjsp_mul_v2r8(r32,vftabscale);
2076             itab_tmp         = _fjsp_dtox_v2r8(rt);
2077             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2078             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2079             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2080
2081             vfconv.i[0]     *= 4;
2082             vfconv.i[1]     *= 4;
2083
2084             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2085             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2086             F                = _fjsp_setzero_v2r8();
2087             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2088             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2089             H                = _fjsp_setzero_v2r8();
2090             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2091             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2092             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2093             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
2094
2095             fscal            = felec;
2096
2097             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2098
2099             /* Update vectorial force */
2100             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
2101             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
2102             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
2103             
2104             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
2105             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
2106             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
2107
2108             /**************************
2109              * CALCULATE INTERACTIONS *
2110              **************************/
2111
2112             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
2113
2114             /* Calculate table index by multiplying r with table scale and truncate to integer */
2115             rt               = _fjsp_mul_v2r8(r33,vftabscale);
2116             itab_tmp         = _fjsp_dtox_v2r8(rt);
2117             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
2118             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
2119             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
2120
2121             vfconv.i[0]     *= 4;
2122             vfconv.i[1]     *= 4;
2123
2124             /* CUBIC SPLINE TABLE ELECTROSTATICS */
2125             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
2126             F                = _fjsp_setzero_v2r8();
2127             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
2128             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
2129             H                = _fjsp_setzero_v2r8();
2130             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
2131             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
2132             FF               = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
2133             felec            = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
2134
2135             fscal            = felec;
2136
2137             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2138
2139             /* Update vectorial force */
2140             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
2141             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
2142             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
2143             
2144             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
2145             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
2146             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
2147
2148             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2149
2150             /* Inner loop uses 378 flops */
2151         }
2152
2153         /* End of innermost loop */
2154
2155         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2156                                               f+i_coord_offset+DIM,fshift+i_shift_offset);
2157
2158         /* Increment number of inner iterations */
2159         inneriter                  += j_index_end - j_index_start;
2160
2161         /* Outer loop uses 18 flops */
2162     }
2163
2164     /* Increment number of outer iterations */
2165     outeriter        += nri;
2166
2167     /* Update outer/inner flops */
2168
2169     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*378);
2170 }