e5e93e45135a896bc816b989ecd63d2b4da00e42
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecEw_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "vec.h"
47 #include "nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: Ewald
54  * VdW interaction:            LennardJones
55  * Geometry:                   Water4-Water4
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset0;
82     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
83     int              vdwioffset1;
84     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
85     int              vdwioffset2;
86     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87     int              vdwioffset3;
88     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
89     int              vdwjidx0A,vdwjidx0B;
90     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
91     int              vdwjidx1A,vdwjidx1B;
92     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
93     int              vdwjidx2A,vdwjidx2B;
94     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
95     int              vdwjidx3A,vdwjidx3B;
96     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
97     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
98     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
99     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
100     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
101     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
102     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
103     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
104     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
105     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
106     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
107     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
108     real             *charge;
109     int              nvdwtype;
110     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
111     int              *vdwtype;
112     real             *vdwparam;
113     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
114     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
115     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
116     real             *ewtab;
117     _fjsp_v2r8       itab_tmp;
118     _fjsp_v2r8       dummy_mask,cutoff_mask;
119     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
120     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
121     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
122
123     x                = xx[0];
124     f                = ff[0];
125
126     nri              = nlist->nri;
127     iinr             = nlist->iinr;
128     jindex           = nlist->jindex;
129     jjnr             = nlist->jjnr;
130     shiftidx         = nlist->shift;
131     gid              = nlist->gid;
132     shiftvec         = fr->shift_vec[0];
133     fshift           = fr->fshift[0];
134     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
135     charge           = mdatoms->chargeA;
136     nvdwtype         = fr->ntype;
137     vdwparam         = fr->nbfp;
138     vdwtype          = mdatoms->typeA;
139
140     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
141     ewtab            = fr->ic->tabq_coul_FDV0;
142     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
143     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
144
145     /* Setup water-specific parameters */
146     inr              = nlist->iinr[0];
147     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
148     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
149     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
150     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
151
152     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
153     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
154     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
155     vdwjidx0A        = 2*vdwtype[inr+0];
156     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
157     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
158     qq11             = _fjsp_mul_v2r8(iq1,jq1);
159     qq12             = _fjsp_mul_v2r8(iq1,jq2);
160     qq13             = _fjsp_mul_v2r8(iq1,jq3);
161     qq21             = _fjsp_mul_v2r8(iq2,jq1);
162     qq22             = _fjsp_mul_v2r8(iq2,jq2);
163     qq23             = _fjsp_mul_v2r8(iq2,jq3);
164     qq31             = _fjsp_mul_v2r8(iq3,jq1);
165     qq32             = _fjsp_mul_v2r8(iq3,jq2);
166     qq33             = _fjsp_mul_v2r8(iq3,jq3);
167
168     /* Avoid stupid compiler warnings */
169     jnrA = jnrB = 0;
170     j_coord_offsetA = 0;
171     j_coord_offsetB = 0;
172
173     outeriter        = 0;
174     inneriter        = 0;
175
176     /* Start outer loop over neighborlists */
177     for(iidx=0; iidx<nri; iidx++)
178     {
179         /* Load shift vector for this list */
180         i_shift_offset   = DIM*shiftidx[iidx];
181
182         /* Load limits for loop over neighbors */
183         j_index_start    = jindex[iidx];
184         j_index_end      = jindex[iidx+1];
185
186         /* Get outer coordinate index */
187         inr              = iinr[iidx];
188         i_coord_offset   = DIM*inr;
189
190         /* Load i particle coords and add shift vector */
191         gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
192                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
193
194         fix0             = _fjsp_setzero_v2r8();
195         fiy0             = _fjsp_setzero_v2r8();
196         fiz0             = _fjsp_setzero_v2r8();
197         fix1             = _fjsp_setzero_v2r8();
198         fiy1             = _fjsp_setzero_v2r8();
199         fiz1             = _fjsp_setzero_v2r8();
200         fix2             = _fjsp_setzero_v2r8();
201         fiy2             = _fjsp_setzero_v2r8();
202         fiz2             = _fjsp_setzero_v2r8();
203         fix3             = _fjsp_setzero_v2r8();
204         fiy3             = _fjsp_setzero_v2r8();
205         fiz3             = _fjsp_setzero_v2r8();
206
207         /* Reset potential sums */
208         velecsum         = _fjsp_setzero_v2r8();
209         vvdwsum          = _fjsp_setzero_v2r8();
210
211         /* Start inner kernel loop */
212         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
213         {
214
215             /* Get j neighbor index, and coordinate index */
216             jnrA             = jjnr[jidx];
217             jnrB             = jjnr[jidx+1];
218             j_coord_offsetA  = DIM*jnrA;
219             j_coord_offsetB  = DIM*jnrB;
220
221             /* load j atom coordinates */
222             gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
223                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
224                                               &jy2,&jz2,&jx3,&jy3,&jz3);
225
226             /* Calculate displacement vector */
227             dx00             = _fjsp_sub_v2r8(ix0,jx0);
228             dy00             = _fjsp_sub_v2r8(iy0,jy0);
229             dz00             = _fjsp_sub_v2r8(iz0,jz0);
230             dx11             = _fjsp_sub_v2r8(ix1,jx1);
231             dy11             = _fjsp_sub_v2r8(iy1,jy1);
232             dz11             = _fjsp_sub_v2r8(iz1,jz1);
233             dx12             = _fjsp_sub_v2r8(ix1,jx2);
234             dy12             = _fjsp_sub_v2r8(iy1,jy2);
235             dz12             = _fjsp_sub_v2r8(iz1,jz2);
236             dx13             = _fjsp_sub_v2r8(ix1,jx3);
237             dy13             = _fjsp_sub_v2r8(iy1,jy3);
238             dz13             = _fjsp_sub_v2r8(iz1,jz3);
239             dx21             = _fjsp_sub_v2r8(ix2,jx1);
240             dy21             = _fjsp_sub_v2r8(iy2,jy1);
241             dz21             = _fjsp_sub_v2r8(iz2,jz1);
242             dx22             = _fjsp_sub_v2r8(ix2,jx2);
243             dy22             = _fjsp_sub_v2r8(iy2,jy2);
244             dz22             = _fjsp_sub_v2r8(iz2,jz2);
245             dx23             = _fjsp_sub_v2r8(ix2,jx3);
246             dy23             = _fjsp_sub_v2r8(iy2,jy3);
247             dz23             = _fjsp_sub_v2r8(iz2,jz3);
248             dx31             = _fjsp_sub_v2r8(ix3,jx1);
249             dy31             = _fjsp_sub_v2r8(iy3,jy1);
250             dz31             = _fjsp_sub_v2r8(iz3,jz1);
251             dx32             = _fjsp_sub_v2r8(ix3,jx2);
252             dy32             = _fjsp_sub_v2r8(iy3,jy2);
253             dz32             = _fjsp_sub_v2r8(iz3,jz2);
254             dx33             = _fjsp_sub_v2r8(ix3,jx3);
255             dy33             = _fjsp_sub_v2r8(iy3,jy3);
256             dz33             = _fjsp_sub_v2r8(iz3,jz3);
257
258             /* Calculate squared distance and things based on it */
259             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
260             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
261             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
262             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
263             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
264             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
265             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
266             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
267             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
268             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
269
270             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
271             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
272             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
273             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
274             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
275             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
276             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
277             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
278             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
279
280             rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
281             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
282             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
283             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
284             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
285             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
286             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
287             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
288             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
289             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
290
291             fjx0             = _fjsp_setzero_v2r8();
292             fjy0             = _fjsp_setzero_v2r8();
293             fjz0             = _fjsp_setzero_v2r8();
294             fjx1             = _fjsp_setzero_v2r8();
295             fjy1             = _fjsp_setzero_v2r8();
296             fjz1             = _fjsp_setzero_v2r8();
297             fjx2             = _fjsp_setzero_v2r8();
298             fjy2             = _fjsp_setzero_v2r8();
299             fjz2             = _fjsp_setzero_v2r8();
300             fjx3             = _fjsp_setzero_v2r8();
301             fjy3             = _fjsp_setzero_v2r8();
302             fjz3             = _fjsp_setzero_v2r8();
303
304             /**************************
305              * CALCULATE INTERACTIONS *
306              **************************/
307
308             /* LENNARD-JONES DISPERSION/REPULSION */
309
310             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
311             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
312             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
313             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
314             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
315
316             /* Update potential sum for this i atom from the interaction with this j atom. */
317             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
318
319             fscal            = fvdw;
320
321             /* Update vectorial force */
322             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
323             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
324             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
325             
326             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
327             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
328             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
329
330             /**************************
331              * CALCULATE INTERACTIONS *
332              **************************/
333
334             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
335
336             /* EWALD ELECTROSTATICS */
337
338             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
339             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
340             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
341             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
342             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
343
344             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
345             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
346             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
347             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
348             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
349             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
350             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
351             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
352             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
353             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
354
355             /* Update potential sum for this i atom from the interaction with this j atom. */
356             velecsum         = _fjsp_add_v2r8(velecsum,velec);
357
358             fscal            = felec;
359
360             /* Update vectorial force */
361             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
362             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
363             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
364             
365             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
366             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
367             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
368
369             /**************************
370              * CALCULATE INTERACTIONS *
371              **************************/
372
373             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
374
375             /* EWALD ELECTROSTATICS */
376
377             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
378             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
379             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
380             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
381             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
382
383             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
384             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
385             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
386             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
387             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
388             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
389             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
390             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
391             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
392             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
393
394             /* Update potential sum for this i atom from the interaction with this j atom. */
395             velecsum         = _fjsp_add_v2r8(velecsum,velec);
396
397             fscal            = felec;
398
399             /* Update vectorial force */
400             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
401             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
402             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
403             
404             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
405             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
406             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
407
408             /**************************
409              * CALCULATE INTERACTIONS *
410              **************************/
411
412             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
413
414             /* EWALD ELECTROSTATICS */
415
416             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
417             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
418             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
419             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
420             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
421
422             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
423             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
424             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
425             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
426             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
427             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
428             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
429             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
430             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
431             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
432
433             /* Update potential sum for this i atom from the interaction with this j atom. */
434             velecsum         = _fjsp_add_v2r8(velecsum,velec);
435
436             fscal            = felec;
437
438             /* Update vectorial force */
439             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
440             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
441             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
442             
443             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
444             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
445             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
446
447             /**************************
448              * CALCULATE INTERACTIONS *
449              **************************/
450
451             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
452
453             /* EWALD ELECTROSTATICS */
454
455             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
456             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
457             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
458             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
459             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
460
461             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
462             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
463             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
464             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
465             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
466             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
467             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
468             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
469             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
470             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
471
472             /* Update potential sum for this i atom from the interaction with this j atom. */
473             velecsum         = _fjsp_add_v2r8(velecsum,velec);
474
475             fscal            = felec;
476
477             /* Update vectorial force */
478             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
479             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
480             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
481             
482             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
483             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
484             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
485
486             /**************************
487              * CALCULATE INTERACTIONS *
488              **************************/
489
490             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
491
492             /* EWALD ELECTROSTATICS */
493
494             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
495             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
496             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
497             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
498             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
499
500             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
501             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
502             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
503             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
504             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
505             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
506             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
507             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
508             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
509             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
510
511             /* Update potential sum for this i atom from the interaction with this j atom. */
512             velecsum         = _fjsp_add_v2r8(velecsum,velec);
513
514             fscal            = felec;
515
516             /* Update vectorial force */
517             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
518             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
519             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
520             
521             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
522             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
523             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
524
525             /**************************
526              * CALCULATE INTERACTIONS *
527              **************************/
528
529             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
530
531             /* EWALD ELECTROSTATICS */
532
533             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
534             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
535             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
536             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
537             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
538
539             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
540             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
541             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
542             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
543             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
544             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
545             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
546             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
547             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
548             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
549
550             /* Update potential sum for this i atom from the interaction with this j atom. */
551             velecsum         = _fjsp_add_v2r8(velecsum,velec);
552
553             fscal            = felec;
554
555             /* Update vectorial force */
556             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
557             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
558             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
559             
560             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
561             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
562             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
563
564             /**************************
565              * CALCULATE INTERACTIONS *
566              **************************/
567
568             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
569
570             /* EWALD ELECTROSTATICS */
571
572             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
573             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
574             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
575             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
576             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
577
578             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
579             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
580             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
581             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
582             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
583             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
584             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
585             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
586             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
587             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
588
589             /* Update potential sum for this i atom from the interaction with this j atom. */
590             velecsum         = _fjsp_add_v2r8(velecsum,velec);
591
592             fscal            = felec;
593
594             /* Update vectorial force */
595             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
596             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
597             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
598             
599             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
600             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
601             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
602
603             /**************************
604              * CALCULATE INTERACTIONS *
605              **************************/
606
607             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
608
609             /* EWALD ELECTROSTATICS */
610
611             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
612             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
613             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
614             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
615             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
616
617             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
618             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
619             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
620             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
621             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
622             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
623             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
624             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
625             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
626             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
627
628             /* Update potential sum for this i atom from the interaction with this j atom. */
629             velecsum         = _fjsp_add_v2r8(velecsum,velec);
630
631             fscal            = felec;
632
633             /* Update vectorial force */
634             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
635             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
636             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
637             
638             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
639             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
640             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
641
642             /**************************
643              * CALCULATE INTERACTIONS *
644              **************************/
645
646             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
647
648             /* EWALD ELECTROSTATICS */
649
650             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
651             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
652             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
653             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
654             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
655
656             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
657             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
658             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
659             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
660             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
661             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
662             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
663             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
664             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
665             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
666
667             /* Update potential sum for this i atom from the interaction with this j atom. */
668             velecsum         = _fjsp_add_v2r8(velecsum,velec);
669
670             fscal            = felec;
671
672             /* Update vectorial force */
673             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
674             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
675             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
676             
677             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
678             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
679             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
680
681             gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
682
683             /* Inner loop uses 434 flops */
684         }
685
686         if(jidx<j_index_end)
687         {
688
689             jnrA             = jjnr[jidx];
690             j_coord_offsetA  = DIM*jnrA;
691
692             /* load j atom coordinates */
693             gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
694                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
695                                               &jy2,&jz2,&jx3,&jy3,&jz3);
696
697             /* Calculate displacement vector */
698             dx00             = _fjsp_sub_v2r8(ix0,jx0);
699             dy00             = _fjsp_sub_v2r8(iy0,jy0);
700             dz00             = _fjsp_sub_v2r8(iz0,jz0);
701             dx11             = _fjsp_sub_v2r8(ix1,jx1);
702             dy11             = _fjsp_sub_v2r8(iy1,jy1);
703             dz11             = _fjsp_sub_v2r8(iz1,jz1);
704             dx12             = _fjsp_sub_v2r8(ix1,jx2);
705             dy12             = _fjsp_sub_v2r8(iy1,jy2);
706             dz12             = _fjsp_sub_v2r8(iz1,jz2);
707             dx13             = _fjsp_sub_v2r8(ix1,jx3);
708             dy13             = _fjsp_sub_v2r8(iy1,jy3);
709             dz13             = _fjsp_sub_v2r8(iz1,jz3);
710             dx21             = _fjsp_sub_v2r8(ix2,jx1);
711             dy21             = _fjsp_sub_v2r8(iy2,jy1);
712             dz21             = _fjsp_sub_v2r8(iz2,jz1);
713             dx22             = _fjsp_sub_v2r8(ix2,jx2);
714             dy22             = _fjsp_sub_v2r8(iy2,jy2);
715             dz22             = _fjsp_sub_v2r8(iz2,jz2);
716             dx23             = _fjsp_sub_v2r8(ix2,jx3);
717             dy23             = _fjsp_sub_v2r8(iy2,jy3);
718             dz23             = _fjsp_sub_v2r8(iz2,jz3);
719             dx31             = _fjsp_sub_v2r8(ix3,jx1);
720             dy31             = _fjsp_sub_v2r8(iy3,jy1);
721             dz31             = _fjsp_sub_v2r8(iz3,jz1);
722             dx32             = _fjsp_sub_v2r8(ix3,jx2);
723             dy32             = _fjsp_sub_v2r8(iy3,jy2);
724             dz32             = _fjsp_sub_v2r8(iz3,jz2);
725             dx33             = _fjsp_sub_v2r8(ix3,jx3);
726             dy33             = _fjsp_sub_v2r8(iy3,jy3);
727             dz33             = _fjsp_sub_v2r8(iz3,jz3);
728
729             /* Calculate squared distance and things based on it */
730             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
731             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
732             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
733             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
734             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
735             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
736             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
737             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
738             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
739             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
740
741             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
742             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
743             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
744             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
745             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
746             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
747             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
748             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
749             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
750
751             rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
752             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
753             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
754             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
755             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
756             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
757             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
758             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
759             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
760             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
761
762             fjx0             = _fjsp_setzero_v2r8();
763             fjy0             = _fjsp_setzero_v2r8();
764             fjz0             = _fjsp_setzero_v2r8();
765             fjx1             = _fjsp_setzero_v2r8();
766             fjy1             = _fjsp_setzero_v2r8();
767             fjz1             = _fjsp_setzero_v2r8();
768             fjx2             = _fjsp_setzero_v2r8();
769             fjy2             = _fjsp_setzero_v2r8();
770             fjz2             = _fjsp_setzero_v2r8();
771             fjx3             = _fjsp_setzero_v2r8();
772             fjy3             = _fjsp_setzero_v2r8();
773             fjz3             = _fjsp_setzero_v2r8();
774
775             /**************************
776              * CALCULATE INTERACTIONS *
777              **************************/
778
779             /* LENNARD-JONES DISPERSION/REPULSION */
780
781             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
782             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
783             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
784             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
785             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
786
787             /* Update potential sum for this i atom from the interaction with this j atom. */
788             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
789             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
790
791             fscal            = fvdw;
792
793             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
794
795             /* Update vectorial force */
796             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
797             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
798             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
799             
800             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
801             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
802             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
803
804             /**************************
805              * CALCULATE INTERACTIONS *
806              **************************/
807
808             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
809
810             /* EWALD ELECTROSTATICS */
811
812             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
813             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
814             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
815             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
816             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
817
818             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
819             ewtabD           = _fjsp_setzero_v2r8();
820             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
821             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
822             ewtabFn          = _fjsp_setzero_v2r8();
823             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
824             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
825             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
826             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
827             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
828
829             /* Update potential sum for this i atom from the interaction with this j atom. */
830             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
831             velecsum         = _fjsp_add_v2r8(velecsum,velec);
832
833             fscal            = felec;
834
835             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
836
837             /* Update vectorial force */
838             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
839             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
840             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
841             
842             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
843             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
844             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
845
846             /**************************
847              * CALCULATE INTERACTIONS *
848              **************************/
849
850             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
851
852             /* EWALD ELECTROSTATICS */
853
854             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
855             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
856             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
857             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
858             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
859
860             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
861             ewtabD           = _fjsp_setzero_v2r8();
862             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
863             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
864             ewtabFn          = _fjsp_setzero_v2r8();
865             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
866             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
867             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
868             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
869             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
870
871             /* Update potential sum for this i atom from the interaction with this j atom. */
872             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
873             velecsum         = _fjsp_add_v2r8(velecsum,velec);
874
875             fscal            = felec;
876
877             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
878
879             /* Update vectorial force */
880             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
881             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
882             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
883             
884             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
885             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
886             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
887
888             /**************************
889              * CALCULATE INTERACTIONS *
890              **************************/
891
892             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
893
894             /* EWALD ELECTROSTATICS */
895
896             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
897             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
898             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
899             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
900             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
901
902             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
903             ewtabD           = _fjsp_setzero_v2r8();
904             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
905             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
906             ewtabFn          = _fjsp_setzero_v2r8();
907             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
908             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
909             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
910             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
911             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
912
913             /* Update potential sum for this i atom from the interaction with this j atom. */
914             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
915             velecsum         = _fjsp_add_v2r8(velecsum,velec);
916
917             fscal            = felec;
918
919             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
920
921             /* Update vectorial force */
922             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
923             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
924             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
925             
926             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
927             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
928             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
929
930             /**************************
931              * CALCULATE INTERACTIONS *
932              **************************/
933
934             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
935
936             /* EWALD ELECTROSTATICS */
937
938             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
939             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
940             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
941             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
942             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
943
944             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
945             ewtabD           = _fjsp_setzero_v2r8();
946             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
947             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
948             ewtabFn          = _fjsp_setzero_v2r8();
949             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
950             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
951             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
952             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
953             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
954
955             /* Update potential sum for this i atom from the interaction with this j atom. */
956             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
957             velecsum         = _fjsp_add_v2r8(velecsum,velec);
958
959             fscal            = felec;
960
961             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
962
963             /* Update vectorial force */
964             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
965             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
966             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
967             
968             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
969             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
970             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
971
972             /**************************
973              * CALCULATE INTERACTIONS *
974              **************************/
975
976             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
977
978             /* EWALD ELECTROSTATICS */
979
980             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
981             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
982             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
983             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
984             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
985
986             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
987             ewtabD           = _fjsp_setzero_v2r8();
988             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
989             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
990             ewtabFn          = _fjsp_setzero_v2r8();
991             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
992             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
993             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
994             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
995             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
996
997             /* Update potential sum for this i atom from the interaction with this j atom. */
998             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
999             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1000
1001             fscal            = felec;
1002
1003             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1004
1005             /* Update vectorial force */
1006             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1007             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1008             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1009             
1010             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1011             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1012             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1013
1014             /**************************
1015              * CALCULATE INTERACTIONS *
1016              **************************/
1017
1018             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
1019
1020             /* EWALD ELECTROSTATICS */
1021
1022             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1023             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
1024             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1025             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1026             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1027
1028             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1029             ewtabD           = _fjsp_setzero_v2r8();
1030             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1031             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1032             ewtabFn          = _fjsp_setzero_v2r8();
1033             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1034             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1035             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1036             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
1037             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
1038
1039             /* Update potential sum for this i atom from the interaction with this j atom. */
1040             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1041             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1042
1043             fscal            = felec;
1044
1045             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1046
1047             /* Update vectorial force */
1048             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1049             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1050             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1051             
1052             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1053             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1054             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1055
1056             /**************************
1057              * CALCULATE INTERACTIONS *
1058              **************************/
1059
1060             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
1061
1062             /* EWALD ELECTROSTATICS */
1063
1064             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1065             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
1066             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1067             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1068             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1069
1070             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1071             ewtabD           = _fjsp_setzero_v2r8();
1072             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1073             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1074             ewtabFn          = _fjsp_setzero_v2r8();
1075             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1076             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1077             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1078             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
1079             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
1080
1081             /* Update potential sum for this i atom from the interaction with this j atom. */
1082             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1083             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1084
1085             fscal            = felec;
1086
1087             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1088
1089             /* Update vectorial force */
1090             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1091             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1092             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1093             
1094             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1095             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1096             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1097
1098             /**************************
1099              * CALCULATE INTERACTIONS *
1100              **************************/
1101
1102             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
1103
1104             /* EWALD ELECTROSTATICS */
1105
1106             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1107             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
1108             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1109             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1110             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1111
1112             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1113             ewtabD           = _fjsp_setzero_v2r8();
1114             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1115             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1116             ewtabFn          = _fjsp_setzero_v2r8();
1117             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1118             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1119             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1120             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
1121             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
1122
1123             /* Update potential sum for this i atom from the interaction with this j atom. */
1124             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1125             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1126
1127             fscal            = felec;
1128
1129             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1130
1131             /* Update vectorial force */
1132             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1133             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1134             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1135             
1136             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1137             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1138             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1139
1140             /**************************
1141              * CALCULATE INTERACTIONS *
1142              **************************/
1143
1144             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
1145
1146             /* EWALD ELECTROSTATICS */
1147
1148             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1149             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
1150             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1151             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1152             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1153
1154             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1155             ewtabD           = _fjsp_setzero_v2r8();
1156             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1157             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1158             ewtabFn          = _fjsp_setzero_v2r8();
1159             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1160             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1161             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1162             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
1163             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
1164
1165             /* Update potential sum for this i atom from the interaction with this j atom. */
1166             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1167             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1168
1169             fscal            = felec;
1170
1171             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1172
1173             /* Update vectorial force */
1174             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1175             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1176             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1177             
1178             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1179             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1180             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1181
1182             gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1183
1184             /* Inner loop uses 434 flops */
1185         }
1186
1187         /* End of innermost loop */
1188
1189         gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1190                                               f+i_coord_offset,fshift+i_shift_offset);
1191
1192         ggid                        = gid[iidx];
1193         /* Update potential energies */
1194         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1195         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1196
1197         /* Increment number of inner iterations */
1198         inneriter                  += j_index_end - j_index_start;
1199
1200         /* Outer loop uses 26 flops */
1201     }
1202
1203     /* Increment number of outer iterations */
1204     outeriter        += nri;
1205
1206     /* Update outer/inner flops */
1207
1208     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*434);
1209 }
1210 /*
1211  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
1212  * Electrostatics interaction: Ewald
1213  * VdW interaction:            LennardJones
1214  * Geometry:                   Water4-Water4
1215  * Calculate force/pot:        Force
1216  */
1217 void
1218 nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
1219                     (t_nblist                    * gmx_restrict       nlist,
1220                      rvec                        * gmx_restrict          xx,
1221                      rvec                        * gmx_restrict          ff,
1222                      t_forcerec                  * gmx_restrict          fr,
1223                      t_mdatoms                   * gmx_restrict     mdatoms,
1224                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1225                      t_nrnb                      * gmx_restrict        nrnb)
1226 {
1227     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1228      * just 0 for non-waters.
1229      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1230      * jnr indices corresponding to data put in the four positions in the SIMD register.
1231      */
1232     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1233     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1234     int              jnrA,jnrB;
1235     int              j_coord_offsetA,j_coord_offsetB;
1236     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1237     real             rcutoff_scalar;
1238     real             *shiftvec,*fshift,*x,*f;
1239     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1240     int              vdwioffset0;
1241     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1242     int              vdwioffset1;
1243     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1244     int              vdwioffset2;
1245     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1246     int              vdwioffset3;
1247     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1248     int              vdwjidx0A,vdwjidx0B;
1249     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1250     int              vdwjidx1A,vdwjidx1B;
1251     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1252     int              vdwjidx2A,vdwjidx2B;
1253     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1254     int              vdwjidx3A,vdwjidx3B;
1255     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1256     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1257     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1258     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1259     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1260     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1261     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1262     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1263     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1264     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1265     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1266     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1267     real             *charge;
1268     int              nvdwtype;
1269     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1270     int              *vdwtype;
1271     real             *vdwparam;
1272     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1273     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1274     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1275     real             *ewtab;
1276     _fjsp_v2r8       itab_tmp;
1277     _fjsp_v2r8       dummy_mask,cutoff_mask;
1278     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1279     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1280     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1281
1282     x                = xx[0];
1283     f                = ff[0];
1284
1285     nri              = nlist->nri;
1286     iinr             = nlist->iinr;
1287     jindex           = nlist->jindex;
1288     jjnr             = nlist->jjnr;
1289     shiftidx         = nlist->shift;
1290     gid              = nlist->gid;
1291     shiftvec         = fr->shift_vec[0];
1292     fshift           = fr->fshift[0];
1293     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1294     charge           = mdatoms->chargeA;
1295     nvdwtype         = fr->ntype;
1296     vdwparam         = fr->nbfp;
1297     vdwtype          = mdatoms->typeA;
1298
1299     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1300     ewtab            = fr->ic->tabq_coul_F;
1301     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1302     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1303
1304     /* Setup water-specific parameters */
1305     inr              = nlist->iinr[0];
1306     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1307     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1308     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1309     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1310
1311     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1312     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1313     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
1314     vdwjidx0A        = 2*vdwtype[inr+0];
1315     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1316     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1317     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1318     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1319     qq13             = _fjsp_mul_v2r8(iq1,jq3);
1320     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1321     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1322     qq23             = _fjsp_mul_v2r8(iq2,jq3);
1323     qq31             = _fjsp_mul_v2r8(iq3,jq1);
1324     qq32             = _fjsp_mul_v2r8(iq3,jq2);
1325     qq33             = _fjsp_mul_v2r8(iq3,jq3);
1326
1327     /* Avoid stupid compiler warnings */
1328     jnrA = jnrB = 0;
1329     j_coord_offsetA = 0;
1330     j_coord_offsetB = 0;
1331
1332     outeriter        = 0;
1333     inneriter        = 0;
1334
1335     /* Start outer loop over neighborlists */
1336     for(iidx=0; iidx<nri; iidx++)
1337     {
1338         /* Load shift vector for this list */
1339         i_shift_offset   = DIM*shiftidx[iidx];
1340
1341         /* Load limits for loop over neighbors */
1342         j_index_start    = jindex[iidx];
1343         j_index_end      = jindex[iidx+1];
1344
1345         /* Get outer coordinate index */
1346         inr              = iinr[iidx];
1347         i_coord_offset   = DIM*inr;
1348
1349         /* Load i particle coords and add shift vector */
1350         gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1351                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1352
1353         fix0             = _fjsp_setzero_v2r8();
1354         fiy0             = _fjsp_setzero_v2r8();
1355         fiz0             = _fjsp_setzero_v2r8();
1356         fix1             = _fjsp_setzero_v2r8();
1357         fiy1             = _fjsp_setzero_v2r8();
1358         fiz1             = _fjsp_setzero_v2r8();
1359         fix2             = _fjsp_setzero_v2r8();
1360         fiy2             = _fjsp_setzero_v2r8();
1361         fiz2             = _fjsp_setzero_v2r8();
1362         fix3             = _fjsp_setzero_v2r8();
1363         fiy3             = _fjsp_setzero_v2r8();
1364         fiz3             = _fjsp_setzero_v2r8();
1365
1366         /* Start inner kernel loop */
1367         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1368         {
1369
1370             /* Get j neighbor index, and coordinate index */
1371             jnrA             = jjnr[jidx];
1372             jnrB             = jjnr[jidx+1];
1373             j_coord_offsetA  = DIM*jnrA;
1374             j_coord_offsetB  = DIM*jnrB;
1375
1376             /* load j atom coordinates */
1377             gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1378                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1379                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1380
1381             /* Calculate displacement vector */
1382             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1383             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1384             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1385             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1386             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1387             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1388             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1389             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1390             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1391             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1392             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1393             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1394             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1395             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1396             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1397             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1398             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1399             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1400             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1401             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1402             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1403             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1404             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1405             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1406             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1407             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1408             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1409             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1410             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1411             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1412
1413             /* Calculate squared distance and things based on it */
1414             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1415             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1416             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1417             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1418             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1419             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1420             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1421             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1422             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1423             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1424
1425             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1426             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1427             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1428             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1429             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1430             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1431             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1432             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1433             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1434
1435             rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
1436             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1437             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1438             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1439             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1440             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1441             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1442             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1443             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1444             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1445
1446             fjx0             = _fjsp_setzero_v2r8();
1447             fjy0             = _fjsp_setzero_v2r8();
1448             fjz0             = _fjsp_setzero_v2r8();
1449             fjx1             = _fjsp_setzero_v2r8();
1450             fjy1             = _fjsp_setzero_v2r8();
1451             fjz1             = _fjsp_setzero_v2r8();
1452             fjx2             = _fjsp_setzero_v2r8();
1453             fjy2             = _fjsp_setzero_v2r8();
1454             fjz2             = _fjsp_setzero_v2r8();
1455             fjx3             = _fjsp_setzero_v2r8();
1456             fjy3             = _fjsp_setzero_v2r8();
1457             fjz3             = _fjsp_setzero_v2r8();
1458
1459             /**************************
1460              * CALCULATE INTERACTIONS *
1461              **************************/
1462
1463             /* LENNARD-JONES DISPERSION/REPULSION */
1464
1465             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1466             fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1467
1468             fscal            = fvdw;
1469
1470             /* Update vectorial force */
1471             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1472             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1473             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1474             
1475             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1476             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1477             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1478
1479             /**************************
1480              * CALCULATE INTERACTIONS *
1481              **************************/
1482
1483             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1484
1485             /* EWALD ELECTROSTATICS */
1486
1487             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1488             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1489             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1490             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1491             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1492
1493             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1494                                          &ewtabF,&ewtabFn);
1495             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1496             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1497
1498             fscal            = felec;
1499
1500             /* Update vectorial force */
1501             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1502             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1503             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1504             
1505             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1506             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1507             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1508
1509             /**************************
1510              * CALCULATE INTERACTIONS *
1511              **************************/
1512
1513             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1514
1515             /* EWALD ELECTROSTATICS */
1516
1517             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1518             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1519             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1520             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1521             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1522
1523             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1524                                          &ewtabF,&ewtabFn);
1525             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1526             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1527
1528             fscal            = felec;
1529
1530             /* Update vectorial force */
1531             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1532             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1533             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1534             
1535             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1536             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1537             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1538
1539             /**************************
1540              * CALCULATE INTERACTIONS *
1541              **************************/
1542
1543             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
1544
1545             /* EWALD ELECTROSTATICS */
1546
1547             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1548             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
1549             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1550             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1551             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1552
1553             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1554                                          &ewtabF,&ewtabFn);
1555             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1556             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
1557
1558             fscal            = felec;
1559
1560             /* Update vectorial force */
1561             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1562             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1563             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1564             
1565             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1566             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1567             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1568
1569             /**************************
1570              * CALCULATE INTERACTIONS *
1571              **************************/
1572
1573             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1574
1575             /* EWALD ELECTROSTATICS */
1576
1577             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1578             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1579             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1580             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1581             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1582
1583             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1584                                          &ewtabF,&ewtabFn);
1585             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1586             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1587
1588             fscal            = felec;
1589
1590             /* Update vectorial force */
1591             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1592             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1593             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1594             
1595             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1596             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1597             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1598
1599             /**************************
1600              * CALCULATE INTERACTIONS *
1601              **************************/
1602
1603             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1604
1605             /* EWALD ELECTROSTATICS */
1606
1607             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1608             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1609             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1610             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1611             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1612
1613             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1614                                          &ewtabF,&ewtabFn);
1615             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1616             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1617
1618             fscal            = felec;
1619
1620             /* Update vectorial force */
1621             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1622             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1623             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1624             
1625             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1626             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1627             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1628
1629             /**************************
1630              * CALCULATE INTERACTIONS *
1631              **************************/
1632
1633             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
1634
1635             /* EWALD ELECTROSTATICS */
1636
1637             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1638             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
1639             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1640             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1641             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1642
1643             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1644                                          &ewtabF,&ewtabFn);
1645             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1646             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
1647
1648             fscal            = felec;
1649
1650             /* Update vectorial force */
1651             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1652             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1653             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1654             
1655             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1656             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1657             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1658
1659             /**************************
1660              * CALCULATE INTERACTIONS *
1661              **************************/
1662
1663             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
1664
1665             /* EWALD ELECTROSTATICS */
1666
1667             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1668             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
1669             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1670             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1671             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1672
1673             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1674                                          &ewtabF,&ewtabFn);
1675             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1676             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
1677
1678             fscal            = felec;
1679
1680             /* Update vectorial force */
1681             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1682             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1683             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1684             
1685             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1686             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1687             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1688
1689             /**************************
1690              * CALCULATE INTERACTIONS *
1691              **************************/
1692
1693             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
1694
1695             /* EWALD ELECTROSTATICS */
1696
1697             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1698             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
1699             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1700             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1701             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1702
1703             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1704                                          &ewtabF,&ewtabFn);
1705             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1706             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
1707
1708             fscal            = felec;
1709
1710             /* Update vectorial force */
1711             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1712             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1713             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1714             
1715             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1716             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1717             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1718
1719             /**************************
1720              * CALCULATE INTERACTIONS *
1721              **************************/
1722
1723             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
1724
1725             /* EWALD ELECTROSTATICS */
1726
1727             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1728             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
1729             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1730             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1731             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1732
1733             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1734                                          &ewtabF,&ewtabFn);
1735             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1736             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
1737
1738             fscal            = felec;
1739
1740             /* Update vectorial force */
1741             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1742             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1743             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1744             
1745             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1746             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1747             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1748
1749             gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1750
1751             /* Inner loop uses 384 flops */
1752         }
1753
1754         if(jidx<j_index_end)
1755         {
1756
1757             jnrA             = jjnr[jidx];
1758             j_coord_offsetA  = DIM*jnrA;
1759
1760             /* load j atom coordinates */
1761             gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1762                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1763                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1764
1765             /* Calculate displacement vector */
1766             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1767             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1768             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1769             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1770             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1771             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1772             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1773             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1774             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1775             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1776             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1777             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1778             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1779             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1780             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1781             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1782             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1783             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1784             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1785             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1786             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1787             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1788             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1789             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1790             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1791             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1792             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1793             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1794             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1795             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1796
1797             /* Calculate squared distance and things based on it */
1798             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1799             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1800             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1801             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1802             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1803             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1804             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1805             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1806             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1807             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1808
1809             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1810             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1811             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1812             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1813             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1814             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1815             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1816             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1817             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1818
1819             rinvsq00         = gmx_fjsp_inv_v2r8(rsq00);
1820             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1821             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1822             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1823             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1824             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1825             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1826             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1827             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1828             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1829
1830             fjx0             = _fjsp_setzero_v2r8();
1831             fjy0             = _fjsp_setzero_v2r8();
1832             fjz0             = _fjsp_setzero_v2r8();
1833             fjx1             = _fjsp_setzero_v2r8();
1834             fjy1             = _fjsp_setzero_v2r8();
1835             fjz1             = _fjsp_setzero_v2r8();
1836             fjx2             = _fjsp_setzero_v2r8();
1837             fjy2             = _fjsp_setzero_v2r8();
1838             fjz2             = _fjsp_setzero_v2r8();
1839             fjx3             = _fjsp_setzero_v2r8();
1840             fjy3             = _fjsp_setzero_v2r8();
1841             fjz3             = _fjsp_setzero_v2r8();
1842
1843             /**************************
1844              * CALCULATE INTERACTIONS *
1845              **************************/
1846
1847             /* LENNARD-JONES DISPERSION/REPULSION */
1848
1849             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1850             fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1851
1852             fscal            = fvdw;
1853
1854             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1855
1856             /* Update vectorial force */
1857             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1858             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1859             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1860             
1861             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1862             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1863             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1864
1865             /**************************
1866              * CALCULATE INTERACTIONS *
1867              **************************/
1868
1869             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1870
1871             /* EWALD ELECTROSTATICS */
1872
1873             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1874             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1875             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1876             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1877             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1878
1879             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1880             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1881             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1882
1883             fscal            = felec;
1884
1885             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1886
1887             /* Update vectorial force */
1888             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1889             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1890             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1891             
1892             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1893             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1894             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1895
1896             /**************************
1897              * CALCULATE INTERACTIONS *
1898              **************************/
1899
1900             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1901
1902             /* EWALD ELECTROSTATICS */
1903
1904             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1905             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1906             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1907             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1908             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1909
1910             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1911             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1912             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1913
1914             fscal            = felec;
1915
1916             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1917
1918             /* Update vectorial force */
1919             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1920             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1921             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1922             
1923             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1924             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1925             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1926
1927             /**************************
1928              * CALCULATE INTERACTIONS *
1929              **************************/
1930
1931             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
1932
1933             /* EWALD ELECTROSTATICS */
1934
1935             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1936             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
1937             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1938             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1939             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1940
1941             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1942             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1943             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
1944
1945             fscal            = felec;
1946
1947             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1948
1949             /* Update vectorial force */
1950             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1951             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1952             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1953             
1954             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1955             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1956             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1957
1958             /**************************
1959              * CALCULATE INTERACTIONS *
1960              **************************/
1961
1962             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1963
1964             /* EWALD ELECTROSTATICS */
1965
1966             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1967             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1968             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1969             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1970             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1971
1972             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1973             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1974             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1975
1976             fscal            = felec;
1977
1978             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1979
1980             /* Update vectorial force */
1981             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1982             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1983             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1984             
1985             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1986             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1987             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1988
1989             /**************************
1990              * CALCULATE INTERACTIONS *
1991              **************************/
1992
1993             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1994
1995             /* EWALD ELECTROSTATICS */
1996
1997             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1998             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1999             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2000             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2001             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2002
2003             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2004             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2005             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
2006
2007             fscal            = felec;
2008
2009             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2010
2011             /* Update vectorial force */
2012             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
2013             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2014             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2015             
2016             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2017             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2018             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2019
2020             /**************************
2021              * CALCULATE INTERACTIONS *
2022              **************************/
2023
2024             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
2025
2026             /* EWALD ELECTROSTATICS */
2027
2028             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2029             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
2030             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2031             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2032             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2033
2034             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2035             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2036             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
2037
2038             fscal            = felec;
2039
2040             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2041
2042             /* Update vectorial force */
2043             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
2044             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
2045             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
2046             
2047             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
2048             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
2049             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
2050
2051             /**************************
2052              * CALCULATE INTERACTIONS *
2053              **************************/
2054
2055             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
2056
2057             /* EWALD ELECTROSTATICS */
2058
2059             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2060             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
2061             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2062             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2063             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2064
2065             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2066             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2067             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
2068
2069             fscal            = felec;
2070
2071             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2072
2073             /* Update vectorial force */
2074             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
2075             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
2076             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
2077             
2078             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
2079             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
2080             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
2081
2082             /**************************
2083              * CALCULATE INTERACTIONS *
2084              **************************/
2085
2086             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
2087
2088             /* EWALD ELECTROSTATICS */
2089
2090             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2091             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
2092             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2093             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2094             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2095
2096             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2097             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2098             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
2099
2100             fscal            = felec;
2101
2102             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2103
2104             /* Update vectorial force */
2105             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
2106             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
2107             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
2108             
2109             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
2110             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
2111             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
2112
2113             /**************************
2114              * CALCULATE INTERACTIONS *
2115              **************************/
2116
2117             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
2118
2119             /* EWALD ELECTROSTATICS */
2120
2121             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2122             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
2123             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2124             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2125             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2126
2127             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2128             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2129             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
2130
2131             fscal            = felec;
2132
2133             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2134
2135             /* Update vectorial force */
2136             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
2137             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
2138             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
2139             
2140             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
2141             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
2142             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
2143
2144             gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2145
2146             /* Inner loop uses 384 flops */
2147         }
2148
2149         /* End of innermost loop */
2150
2151         gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2152                                               f+i_coord_offset,fshift+i_shift_offset);
2153
2154         /* Increment number of inner iterations */
2155         inneriter                  += j_index_end - j_index_start;
2156
2157         /* Outer loop uses 24 flops */
2158     }
2159
2160     /* Increment number of outer iterations */
2161     outeriter        += nri;
2162
2163     /* Update outer/inner flops */
2164
2165     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*384);
2166 }