Use full path for legacyheaders
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecEw_VdwNone_GeomW4W4_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "gromacs/legacyheaders/types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/legacyheaders/nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: Ewald
52  * VdW interaction:            None
53  * Geometry:                   Water4-Water4
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset1;
80     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
81     int              vdwioffset2;
82     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
83     int              vdwioffset3;
84     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
85     int              vdwjidx1A,vdwjidx1B;
86     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
87     int              vdwjidx2A,vdwjidx2B;
88     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
89     int              vdwjidx3A,vdwjidx3B;
90     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
91     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
92     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
93     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
94     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
95     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
96     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
97     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
98     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
99     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
100     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
101     real             *charge;
102     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
103     real             *ewtab;
104     _fjsp_v2r8       itab_tmp;
105     _fjsp_v2r8       dummy_mask,cutoff_mask;
106     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
107     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
108     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
109
110     x                = xx[0];
111     f                = ff[0];
112
113     nri              = nlist->nri;
114     iinr             = nlist->iinr;
115     jindex           = nlist->jindex;
116     jjnr             = nlist->jjnr;
117     shiftidx         = nlist->shift;
118     gid              = nlist->gid;
119     shiftvec         = fr->shift_vec[0];
120     fshift           = fr->fshift[0];
121     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
122     charge           = mdatoms->chargeA;
123
124     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
125     ewtab            = fr->ic->tabq_coul_FDV0;
126     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
127     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
128
129     /* Setup water-specific parameters */
130     inr              = nlist->iinr[0];
131     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
132     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
133     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
134
135     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
136     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
137     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
138     qq11             = _fjsp_mul_v2r8(iq1,jq1);
139     qq12             = _fjsp_mul_v2r8(iq1,jq2);
140     qq13             = _fjsp_mul_v2r8(iq1,jq3);
141     qq21             = _fjsp_mul_v2r8(iq2,jq1);
142     qq22             = _fjsp_mul_v2r8(iq2,jq2);
143     qq23             = _fjsp_mul_v2r8(iq2,jq3);
144     qq31             = _fjsp_mul_v2r8(iq3,jq1);
145     qq32             = _fjsp_mul_v2r8(iq3,jq2);
146     qq33             = _fjsp_mul_v2r8(iq3,jq3);
147
148     /* Avoid stupid compiler warnings */
149     jnrA = jnrB = 0;
150     j_coord_offsetA = 0;
151     j_coord_offsetB = 0;
152
153     outeriter        = 0;
154     inneriter        = 0;
155
156     /* Start outer loop over neighborlists */
157     for(iidx=0; iidx<nri; iidx++)
158     {
159         /* Load shift vector for this list */
160         i_shift_offset   = DIM*shiftidx[iidx];
161
162         /* Load limits for loop over neighbors */
163         j_index_start    = jindex[iidx];
164         j_index_end      = jindex[iidx+1];
165
166         /* Get outer coordinate index */
167         inr              = iinr[iidx];
168         i_coord_offset   = DIM*inr;
169
170         /* Load i particle coords and add shift vector */
171         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
172                                                  &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
173
174         fix1             = _fjsp_setzero_v2r8();
175         fiy1             = _fjsp_setzero_v2r8();
176         fiz1             = _fjsp_setzero_v2r8();
177         fix2             = _fjsp_setzero_v2r8();
178         fiy2             = _fjsp_setzero_v2r8();
179         fiz2             = _fjsp_setzero_v2r8();
180         fix3             = _fjsp_setzero_v2r8();
181         fiy3             = _fjsp_setzero_v2r8();
182         fiz3             = _fjsp_setzero_v2r8();
183
184         /* Reset potential sums */
185         velecsum         = _fjsp_setzero_v2r8();
186
187         /* Start inner kernel loop */
188         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
189         {
190
191             /* Get j neighbor index, and coordinate index */
192             jnrA             = jjnr[jidx];
193             jnrB             = jjnr[jidx+1];
194             j_coord_offsetA  = DIM*jnrA;
195             j_coord_offsetB  = DIM*jnrB;
196
197             /* load j atom coordinates */
198             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
199                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
200
201             /* Calculate displacement vector */
202             dx11             = _fjsp_sub_v2r8(ix1,jx1);
203             dy11             = _fjsp_sub_v2r8(iy1,jy1);
204             dz11             = _fjsp_sub_v2r8(iz1,jz1);
205             dx12             = _fjsp_sub_v2r8(ix1,jx2);
206             dy12             = _fjsp_sub_v2r8(iy1,jy2);
207             dz12             = _fjsp_sub_v2r8(iz1,jz2);
208             dx13             = _fjsp_sub_v2r8(ix1,jx3);
209             dy13             = _fjsp_sub_v2r8(iy1,jy3);
210             dz13             = _fjsp_sub_v2r8(iz1,jz3);
211             dx21             = _fjsp_sub_v2r8(ix2,jx1);
212             dy21             = _fjsp_sub_v2r8(iy2,jy1);
213             dz21             = _fjsp_sub_v2r8(iz2,jz1);
214             dx22             = _fjsp_sub_v2r8(ix2,jx2);
215             dy22             = _fjsp_sub_v2r8(iy2,jy2);
216             dz22             = _fjsp_sub_v2r8(iz2,jz2);
217             dx23             = _fjsp_sub_v2r8(ix2,jx3);
218             dy23             = _fjsp_sub_v2r8(iy2,jy3);
219             dz23             = _fjsp_sub_v2r8(iz2,jz3);
220             dx31             = _fjsp_sub_v2r8(ix3,jx1);
221             dy31             = _fjsp_sub_v2r8(iy3,jy1);
222             dz31             = _fjsp_sub_v2r8(iz3,jz1);
223             dx32             = _fjsp_sub_v2r8(ix3,jx2);
224             dy32             = _fjsp_sub_v2r8(iy3,jy2);
225             dz32             = _fjsp_sub_v2r8(iz3,jz2);
226             dx33             = _fjsp_sub_v2r8(ix3,jx3);
227             dy33             = _fjsp_sub_v2r8(iy3,jy3);
228             dz33             = _fjsp_sub_v2r8(iz3,jz3);
229
230             /* Calculate squared distance and things based on it */
231             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
232             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
233             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
234             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
235             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
236             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
237             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
238             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
239             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
240
241             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
242             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
243             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
244             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
245             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
246             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
247             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
248             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
249             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
250
251             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
252             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
253             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
254             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
255             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
256             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
257             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
258             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
259             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
260
261             fjx1             = _fjsp_setzero_v2r8();
262             fjy1             = _fjsp_setzero_v2r8();
263             fjz1             = _fjsp_setzero_v2r8();
264             fjx2             = _fjsp_setzero_v2r8();
265             fjy2             = _fjsp_setzero_v2r8();
266             fjz2             = _fjsp_setzero_v2r8();
267             fjx3             = _fjsp_setzero_v2r8();
268             fjy3             = _fjsp_setzero_v2r8();
269             fjz3             = _fjsp_setzero_v2r8();
270
271             /**************************
272              * CALCULATE INTERACTIONS *
273              **************************/
274
275             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
276
277             /* EWALD ELECTROSTATICS */
278
279             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
280             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
281             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
282             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
283             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
284
285             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
286             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
287             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
288             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
289             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
290             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
291             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
292             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
293             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
294             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
295
296             /* Update potential sum for this i atom from the interaction with this j atom. */
297             velecsum         = _fjsp_add_v2r8(velecsum,velec);
298
299             fscal            = felec;
300
301             /* Update vectorial force */
302             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
303             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
304             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
305             
306             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
307             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
308             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
309
310             /**************************
311              * CALCULATE INTERACTIONS *
312              **************************/
313
314             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
315
316             /* EWALD ELECTROSTATICS */
317
318             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
319             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
320             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
321             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
322             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
323
324             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
325             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
326             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
327             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
328             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
329             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
330             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
331             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
332             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
333             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
334
335             /* Update potential sum for this i atom from the interaction with this j atom. */
336             velecsum         = _fjsp_add_v2r8(velecsum,velec);
337
338             fscal            = felec;
339
340             /* Update vectorial force */
341             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
342             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
343             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
344             
345             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
346             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
347             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
348
349             /**************************
350              * CALCULATE INTERACTIONS *
351              **************************/
352
353             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
354
355             /* EWALD ELECTROSTATICS */
356
357             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
358             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
359             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
360             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
361             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
362
363             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
364             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
365             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
366             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
367             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
368             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
369             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
370             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
371             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
372             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
373
374             /* Update potential sum for this i atom from the interaction with this j atom. */
375             velecsum         = _fjsp_add_v2r8(velecsum,velec);
376
377             fscal            = felec;
378
379             /* Update vectorial force */
380             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
381             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
382             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
383             
384             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
385             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
386             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
387
388             /**************************
389              * CALCULATE INTERACTIONS *
390              **************************/
391
392             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
393
394             /* EWALD ELECTROSTATICS */
395
396             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
397             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
398             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
399             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
400             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
401
402             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
403             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
404             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
405             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
406             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
407             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
408             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
409             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
410             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
411             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
412
413             /* Update potential sum for this i atom from the interaction with this j atom. */
414             velecsum         = _fjsp_add_v2r8(velecsum,velec);
415
416             fscal            = felec;
417
418             /* Update vectorial force */
419             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
420             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
421             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
422             
423             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
424             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
425             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
426
427             /**************************
428              * CALCULATE INTERACTIONS *
429              **************************/
430
431             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
432
433             /* EWALD ELECTROSTATICS */
434
435             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
436             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
437             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
438             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
439             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
440
441             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
442             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
443             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
444             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
445             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
446             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
447             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
448             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
449             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
450             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
451
452             /* Update potential sum for this i atom from the interaction with this j atom. */
453             velecsum         = _fjsp_add_v2r8(velecsum,velec);
454
455             fscal            = felec;
456
457             /* Update vectorial force */
458             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
459             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
460             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
461             
462             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
463             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
464             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
465
466             /**************************
467              * CALCULATE INTERACTIONS *
468              **************************/
469
470             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
471
472             /* EWALD ELECTROSTATICS */
473
474             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
475             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
476             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
477             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
478             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
479
480             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
481             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
482             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
483             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
484             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
485             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
486             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
487             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
488             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
489             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
490
491             /* Update potential sum for this i atom from the interaction with this j atom. */
492             velecsum         = _fjsp_add_v2r8(velecsum,velec);
493
494             fscal            = felec;
495
496             /* Update vectorial force */
497             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
498             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
499             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
500             
501             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
502             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
503             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
504
505             /**************************
506              * CALCULATE INTERACTIONS *
507              **************************/
508
509             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
510
511             /* EWALD ELECTROSTATICS */
512
513             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
514             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
515             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
516             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
517             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
518
519             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
520             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
521             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
522             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
523             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
524             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
525             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
526             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
527             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
528             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
529
530             /* Update potential sum for this i atom from the interaction with this j atom. */
531             velecsum         = _fjsp_add_v2r8(velecsum,velec);
532
533             fscal            = felec;
534
535             /* Update vectorial force */
536             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
537             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
538             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
539             
540             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
541             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
542             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
543
544             /**************************
545              * CALCULATE INTERACTIONS *
546              **************************/
547
548             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
549
550             /* EWALD ELECTROSTATICS */
551
552             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
553             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
554             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
555             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
556             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
557
558             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
559             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
560             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
561             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
562             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
563             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
564             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
565             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
566             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
567             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
568
569             /* Update potential sum for this i atom from the interaction with this j atom. */
570             velecsum         = _fjsp_add_v2r8(velecsum,velec);
571
572             fscal            = felec;
573
574             /* Update vectorial force */
575             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
576             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
577             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
578             
579             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
580             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
581             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
582
583             /**************************
584              * CALCULATE INTERACTIONS *
585              **************************/
586
587             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
588
589             /* EWALD ELECTROSTATICS */
590
591             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
592             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
593             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
594             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
595             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
596
597             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
598             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
599             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
600             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
601             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
602             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
603             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
604             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
605             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
606             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
607
608             /* Update potential sum for this i atom from the interaction with this j atom. */
609             velecsum         = _fjsp_add_v2r8(velecsum,velec);
610
611             fscal            = felec;
612
613             /* Update vectorial force */
614             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
615             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
616             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
617             
618             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
619             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
620             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
621
622             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
623
624             /* Inner loop uses 396 flops */
625         }
626
627         if(jidx<j_index_end)
628         {
629
630             jnrA             = jjnr[jidx];
631             j_coord_offsetA  = DIM*jnrA;
632
633             /* load j atom coordinates */
634             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
635                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
636
637             /* Calculate displacement vector */
638             dx11             = _fjsp_sub_v2r8(ix1,jx1);
639             dy11             = _fjsp_sub_v2r8(iy1,jy1);
640             dz11             = _fjsp_sub_v2r8(iz1,jz1);
641             dx12             = _fjsp_sub_v2r8(ix1,jx2);
642             dy12             = _fjsp_sub_v2r8(iy1,jy2);
643             dz12             = _fjsp_sub_v2r8(iz1,jz2);
644             dx13             = _fjsp_sub_v2r8(ix1,jx3);
645             dy13             = _fjsp_sub_v2r8(iy1,jy3);
646             dz13             = _fjsp_sub_v2r8(iz1,jz3);
647             dx21             = _fjsp_sub_v2r8(ix2,jx1);
648             dy21             = _fjsp_sub_v2r8(iy2,jy1);
649             dz21             = _fjsp_sub_v2r8(iz2,jz1);
650             dx22             = _fjsp_sub_v2r8(ix2,jx2);
651             dy22             = _fjsp_sub_v2r8(iy2,jy2);
652             dz22             = _fjsp_sub_v2r8(iz2,jz2);
653             dx23             = _fjsp_sub_v2r8(ix2,jx3);
654             dy23             = _fjsp_sub_v2r8(iy2,jy3);
655             dz23             = _fjsp_sub_v2r8(iz2,jz3);
656             dx31             = _fjsp_sub_v2r8(ix3,jx1);
657             dy31             = _fjsp_sub_v2r8(iy3,jy1);
658             dz31             = _fjsp_sub_v2r8(iz3,jz1);
659             dx32             = _fjsp_sub_v2r8(ix3,jx2);
660             dy32             = _fjsp_sub_v2r8(iy3,jy2);
661             dz32             = _fjsp_sub_v2r8(iz3,jz2);
662             dx33             = _fjsp_sub_v2r8(ix3,jx3);
663             dy33             = _fjsp_sub_v2r8(iy3,jy3);
664             dz33             = _fjsp_sub_v2r8(iz3,jz3);
665
666             /* Calculate squared distance and things based on it */
667             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
668             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
669             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
670             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
671             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
672             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
673             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
674             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
675             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
676
677             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
678             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
679             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
680             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
681             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
682             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
683             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
684             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
685             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
686
687             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
688             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
689             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
690             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
691             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
692             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
693             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
694             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
695             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
696
697             fjx1             = _fjsp_setzero_v2r8();
698             fjy1             = _fjsp_setzero_v2r8();
699             fjz1             = _fjsp_setzero_v2r8();
700             fjx2             = _fjsp_setzero_v2r8();
701             fjy2             = _fjsp_setzero_v2r8();
702             fjz2             = _fjsp_setzero_v2r8();
703             fjx3             = _fjsp_setzero_v2r8();
704             fjy3             = _fjsp_setzero_v2r8();
705             fjz3             = _fjsp_setzero_v2r8();
706
707             /**************************
708              * CALCULATE INTERACTIONS *
709              **************************/
710
711             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
712
713             /* EWALD ELECTROSTATICS */
714
715             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
716             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
717             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
718             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
719             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
720
721             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
722             ewtabD           = _fjsp_setzero_v2r8();
723             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
724             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
725             ewtabFn          = _fjsp_setzero_v2r8();
726             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
727             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
728             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
729             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
730             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
731
732             /* Update potential sum for this i atom from the interaction with this j atom. */
733             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
734             velecsum         = _fjsp_add_v2r8(velecsum,velec);
735
736             fscal            = felec;
737
738             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
739
740             /* Update vectorial force */
741             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
742             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
743             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
744             
745             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
746             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
747             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
748
749             /**************************
750              * CALCULATE INTERACTIONS *
751              **************************/
752
753             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
754
755             /* EWALD ELECTROSTATICS */
756
757             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
758             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
759             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
760             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
761             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
762
763             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
764             ewtabD           = _fjsp_setzero_v2r8();
765             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
766             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
767             ewtabFn          = _fjsp_setzero_v2r8();
768             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
769             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
770             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
771             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
772             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
773
774             /* Update potential sum for this i atom from the interaction with this j atom. */
775             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
776             velecsum         = _fjsp_add_v2r8(velecsum,velec);
777
778             fscal            = felec;
779
780             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
781
782             /* Update vectorial force */
783             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
784             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
785             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
786             
787             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
788             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
789             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
790
791             /**************************
792              * CALCULATE INTERACTIONS *
793              **************************/
794
795             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
796
797             /* EWALD ELECTROSTATICS */
798
799             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
800             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
801             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
802             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
803             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
804
805             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
806             ewtabD           = _fjsp_setzero_v2r8();
807             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
808             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
809             ewtabFn          = _fjsp_setzero_v2r8();
810             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
811             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
812             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
813             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
814             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
815
816             /* Update potential sum for this i atom from the interaction with this j atom. */
817             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
818             velecsum         = _fjsp_add_v2r8(velecsum,velec);
819
820             fscal            = felec;
821
822             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
823
824             /* Update vectorial force */
825             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
826             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
827             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
828             
829             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
830             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
831             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
832
833             /**************************
834              * CALCULATE INTERACTIONS *
835              **************************/
836
837             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
838
839             /* EWALD ELECTROSTATICS */
840
841             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
842             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
843             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
844             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
845             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
846
847             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
848             ewtabD           = _fjsp_setzero_v2r8();
849             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
850             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
851             ewtabFn          = _fjsp_setzero_v2r8();
852             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
853             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
854             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
855             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
856             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
857
858             /* Update potential sum for this i atom from the interaction with this j atom. */
859             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
860             velecsum         = _fjsp_add_v2r8(velecsum,velec);
861
862             fscal            = felec;
863
864             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
865
866             /* Update vectorial force */
867             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
868             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
869             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
870             
871             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
872             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
873             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
874
875             /**************************
876              * CALCULATE INTERACTIONS *
877              **************************/
878
879             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
880
881             /* EWALD ELECTROSTATICS */
882
883             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
884             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
885             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
886             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
887             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
888
889             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
890             ewtabD           = _fjsp_setzero_v2r8();
891             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
892             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
893             ewtabFn          = _fjsp_setzero_v2r8();
894             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
895             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
896             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
897             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
898             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
899
900             /* Update potential sum for this i atom from the interaction with this j atom. */
901             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
902             velecsum         = _fjsp_add_v2r8(velecsum,velec);
903
904             fscal            = felec;
905
906             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
907
908             /* Update vectorial force */
909             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
910             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
911             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
912             
913             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
914             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
915             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
916
917             /**************************
918              * CALCULATE INTERACTIONS *
919              **************************/
920
921             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
922
923             /* EWALD ELECTROSTATICS */
924
925             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
926             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
927             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
928             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
929             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
930
931             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
932             ewtabD           = _fjsp_setzero_v2r8();
933             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
934             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
935             ewtabFn          = _fjsp_setzero_v2r8();
936             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
937             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
938             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
939             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
940             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
941
942             /* Update potential sum for this i atom from the interaction with this j atom. */
943             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
944             velecsum         = _fjsp_add_v2r8(velecsum,velec);
945
946             fscal            = felec;
947
948             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
949
950             /* Update vectorial force */
951             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
952             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
953             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
954             
955             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
956             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
957             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
958
959             /**************************
960              * CALCULATE INTERACTIONS *
961              **************************/
962
963             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
964
965             /* EWALD ELECTROSTATICS */
966
967             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
968             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
969             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
970             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
971             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
972
973             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
974             ewtabD           = _fjsp_setzero_v2r8();
975             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
976             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
977             ewtabFn          = _fjsp_setzero_v2r8();
978             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
979             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
980             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
981             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
982             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
983
984             /* Update potential sum for this i atom from the interaction with this j atom. */
985             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
986             velecsum         = _fjsp_add_v2r8(velecsum,velec);
987
988             fscal            = felec;
989
990             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
991
992             /* Update vectorial force */
993             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
994             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
995             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
996             
997             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
998             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
999             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1000
1001             /**************************
1002              * CALCULATE INTERACTIONS *
1003              **************************/
1004
1005             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
1006
1007             /* EWALD ELECTROSTATICS */
1008
1009             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1010             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
1011             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1012             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1013             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1014
1015             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1016             ewtabD           = _fjsp_setzero_v2r8();
1017             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1018             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1019             ewtabFn          = _fjsp_setzero_v2r8();
1020             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1021             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1022             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1023             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
1024             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
1025
1026             /* Update potential sum for this i atom from the interaction with this j atom. */
1027             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1028             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1029
1030             fscal            = felec;
1031
1032             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1033
1034             /* Update vectorial force */
1035             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1036             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1037             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1038             
1039             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1040             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1041             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1042
1043             /**************************
1044              * CALCULATE INTERACTIONS *
1045              **************************/
1046
1047             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
1048
1049             /* EWALD ELECTROSTATICS */
1050
1051             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1052             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
1053             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1054             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1055             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1056
1057             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1058             ewtabD           = _fjsp_setzero_v2r8();
1059             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1060             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1061             ewtabFn          = _fjsp_setzero_v2r8();
1062             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1063             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1064             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1065             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
1066             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
1067
1068             /* Update potential sum for this i atom from the interaction with this j atom. */
1069             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1070             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1071
1072             fscal            = felec;
1073
1074             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1075
1076             /* Update vectorial force */
1077             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1078             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1079             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1080             
1081             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1082             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1083             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1084
1085             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1086
1087             /* Inner loop uses 396 flops */
1088         }
1089
1090         /* End of innermost loop */
1091
1092         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1093                                               f+i_coord_offset+DIM,fshift+i_shift_offset);
1094
1095         ggid                        = gid[iidx];
1096         /* Update potential energies */
1097         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1098
1099         /* Increment number of inner iterations */
1100         inneriter                  += j_index_end - j_index_start;
1101
1102         /* Outer loop uses 19 flops */
1103     }
1104
1105     /* Increment number of outer iterations */
1106     outeriter        += nri;
1107
1108     /* Update outer/inner flops */
1109
1110     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*396);
1111 }
1112 /*
1113  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
1114  * Electrostatics interaction: Ewald
1115  * VdW interaction:            None
1116  * Geometry:                   Water4-Water4
1117  * Calculate force/pot:        Force
1118  */
1119 void
1120 nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
1121                     (t_nblist                    * gmx_restrict       nlist,
1122                      rvec                        * gmx_restrict          xx,
1123                      rvec                        * gmx_restrict          ff,
1124                      t_forcerec                  * gmx_restrict          fr,
1125                      t_mdatoms                   * gmx_restrict     mdatoms,
1126                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1127                      t_nrnb                      * gmx_restrict        nrnb)
1128 {
1129     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1130      * just 0 for non-waters.
1131      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1132      * jnr indices corresponding to data put in the four positions in the SIMD register.
1133      */
1134     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1135     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1136     int              jnrA,jnrB;
1137     int              j_coord_offsetA,j_coord_offsetB;
1138     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1139     real             rcutoff_scalar;
1140     real             *shiftvec,*fshift,*x,*f;
1141     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1142     int              vdwioffset1;
1143     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1144     int              vdwioffset2;
1145     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1146     int              vdwioffset3;
1147     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1148     int              vdwjidx1A,vdwjidx1B;
1149     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1150     int              vdwjidx2A,vdwjidx2B;
1151     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1152     int              vdwjidx3A,vdwjidx3B;
1153     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1154     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1155     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1156     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1157     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1158     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1159     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1160     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1161     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1162     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1163     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1164     real             *charge;
1165     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1166     real             *ewtab;
1167     _fjsp_v2r8       itab_tmp;
1168     _fjsp_v2r8       dummy_mask,cutoff_mask;
1169     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1170     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1171     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1172
1173     x                = xx[0];
1174     f                = ff[0];
1175
1176     nri              = nlist->nri;
1177     iinr             = nlist->iinr;
1178     jindex           = nlist->jindex;
1179     jjnr             = nlist->jjnr;
1180     shiftidx         = nlist->shift;
1181     gid              = nlist->gid;
1182     shiftvec         = fr->shift_vec[0];
1183     fshift           = fr->fshift[0];
1184     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1185     charge           = mdatoms->chargeA;
1186
1187     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1188     ewtab            = fr->ic->tabq_coul_F;
1189     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1190     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1191
1192     /* Setup water-specific parameters */
1193     inr              = nlist->iinr[0];
1194     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1195     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1196     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1197
1198     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1199     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1200     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
1201     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1202     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1203     qq13             = _fjsp_mul_v2r8(iq1,jq3);
1204     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1205     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1206     qq23             = _fjsp_mul_v2r8(iq2,jq3);
1207     qq31             = _fjsp_mul_v2r8(iq3,jq1);
1208     qq32             = _fjsp_mul_v2r8(iq3,jq2);
1209     qq33             = _fjsp_mul_v2r8(iq3,jq3);
1210
1211     /* Avoid stupid compiler warnings */
1212     jnrA = jnrB = 0;
1213     j_coord_offsetA = 0;
1214     j_coord_offsetB = 0;
1215
1216     outeriter        = 0;
1217     inneriter        = 0;
1218
1219     /* Start outer loop over neighborlists */
1220     for(iidx=0; iidx<nri; iidx++)
1221     {
1222         /* Load shift vector for this list */
1223         i_shift_offset   = DIM*shiftidx[iidx];
1224
1225         /* Load limits for loop over neighbors */
1226         j_index_start    = jindex[iidx];
1227         j_index_end      = jindex[iidx+1];
1228
1229         /* Get outer coordinate index */
1230         inr              = iinr[iidx];
1231         i_coord_offset   = DIM*inr;
1232
1233         /* Load i particle coords and add shift vector */
1234         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
1235                                                  &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1236
1237         fix1             = _fjsp_setzero_v2r8();
1238         fiy1             = _fjsp_setzero_v2r8();
1239         fiz1             = _fjsp_setzero_v2r8();
1240         fix2             = _fjsp_setzero_v2r8();
1241         fiy2             = _fjsp_setzero_v2r8();
1242         fiz2             = _fjsp_setzero_v2r8();
1243         fix3             = _fjsp_setzero_v2r8();
1244         fiy3             = _fjsp_setzero_v2r8();
1245         fiz3             = _fjsp_setzero_v2r8();
1246
1247         /* Start inner kernel loop */
1248         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1249         {
1250
1251             /* Get j neighbor index, and coordinate index */
1252             jnrA             = jjnr[jidx];
1253             jnrB             = jjnr[jidx+1];
1254             j_coord_offsetA  = DIM*jnrA;
1255             j_coord_offsetB  = DIM*jnrB;
1256
1257             /* load j atom coordinates */
1258             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
1259                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
1260
1261             /* Calculate displacement vector */
1262             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1263             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1264             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1265             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1266             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1267             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1268             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1269             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1270             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1271             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1272             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1273             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1274             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1275             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1276             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1277             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1278             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1279             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1280             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1281             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1282             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1283             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1284             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1285             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1286             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1287             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1288             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1289
1290             /* Calculate squared distance and things based on it */
1291             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1292             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1293             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1294             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1295             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1296             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1297             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1298             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1299             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1300
1301             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1302             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1303             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1304             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1305             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1306             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1307             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1308             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1309             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1310
1311             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1312             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1313             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1314             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1315             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1316             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1317             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1318             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1319             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1320
1321             fjx1             = _fjsp_setzero_v2r8();
1322             fjy1             = _fjsp_setzero_v2r8();
1323             fjz1             = _fjsp_setzero_v2r8();
1324             fjx2             = _fjsp_setzero_v2r8();
1325             fjy2             = _fjsp_setzero_v2r8();
1326             fjz2             = _fjsp_setzero_v2r8();
1327             fjx3             = _fjsp_setzero_v2r8();
1328             fjy3             = _fjsp_setzero_v2r8();
1329             fjz3             = _fjsp_setzero_v2r8();
1330
1331             /**************************
1332              * CALCULATE INTERACTIONS *
1333              **************************/
1334
1335             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1336
1337             /* EWALD ELECTROSTATICS */
1338
1339             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1340             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1341             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1342             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1343             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1344
1345             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1346                                          &ewtabF,&ewtabFn);
1347             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1348             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1349
1350             fscal            = felec;
1351
1352             /* Update vectorial force */
1353             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1354             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1355             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1356             
1357             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1358             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1359             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1360
1361             /**************************
1362              * CALCULATE INTERACTIONS *
1363              **************************/
1364
1365             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1366
1367             /* EWALD ELECTROSTATICS */
1368
1369             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1370             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1371             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1372             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1373             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1374
1375             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1376                                          &ewtabF,&ewtabFn);
1377             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1378             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1379
1380             fscal            = felec;
1381
1382             /* Update vectorial force */
1383             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1384             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1385             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1386             
1387             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1388             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1389             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1390
1391             /**************************
1392              * CALCULATE INTERACTIONS *
1393              **************************/
1394
1395             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
1396
1397             /* EWALD ELECTROSTATICS */
1398
1399             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1400             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
1401             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1402             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1403             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1404
1405             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1406                                          &ewtabF,&ewtabFn);
1407             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1408             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
1409
1410             fscal            = felec;
1411
1412             /* Update vectorial force */
1413             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1414             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1415             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1416             
1417             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1418             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1419             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1420
1421             /**************************
1422              * CALCULATE INTERACTIONS *
1423              **************************/
1424
1425             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1426
1427             /* EWALD ELECTROSTATICS */
1428
1429             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1430             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1431             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1432             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1433             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1434
1435             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1436                                          &ewtabF,&ewtabFn);
1437             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1438             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1439
1440             fscal            = felec;
1441
1442             /* Update vectorial force */
1443             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1444             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1445             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1446             
1447             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1448             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1449             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1450
1451             /**************************
1452              * CALCULATE INTERACTIONS *
1453              **************************/
1454
1455             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1456
1457             /* EWALD ELECTROSTATICS */
1458
1459             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1460             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1461             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1462             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1463             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1464
1465             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1466                                          &ewtabF,&ewtabFn);
1467             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1468             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1469
1470             fscal            = felec;
1471
1472             /* Update vectorial force */
1473             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1474             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1475             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1476             
1477             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1478             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1479             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1480
1481             /**************************
1482              * CALCULATE INTERACTIONS *
1483              **************************/
1484
1485             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
1486
1487             /* EWALD ELECTROSTATICS */
1488
1489             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1490             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
1491             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1492             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1493             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1494
1495             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1496                                          &ewtabF,&ewtabFn);
1497             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1498             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
1499
1500             fscal            = felec;
1501
1502             /* Update vectorial force */
1503             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1504             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1505             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1506             
1507             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1508             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1509             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1510
1511             /**************************
1512              * CALCULATE INTERACTIONS *
1513              **************************/
1514
1515             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
1516
1517             /* EWALD ELECTROSTATICS */
1518
1519             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1520             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
1521             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1522             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1523             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1524
1525             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1526                                          &ewtabF,&ewtabFn);
1527             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1528             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
1529
1530             fscal            = felec;
1531
1532             /* Update vectorial force */
1533             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1534             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1535             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1536             
1537             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1538             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1539             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1540
1541             /**************************
1542              * CALCULATE INTERACTIONS *
1543              **************************/
1544
1545             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
1546
1547             /* EWALD ELECTROSTATICS */
1548
1549             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1550             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
1551             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1552             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1553             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1554
1555             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1556                                          &ewtabF,&ewtabFn);
1557             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1558             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
1559
1560             fscal            = felec;
1561
1562             /* Update vectorial force */
1563             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1564             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1565             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1566             
1567             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1568             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1569             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1570
1571             /**************************
1572              * CALCULATE INTERACTIONS *
1573              **************************/
1574
1575             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
1576
1577             /* EWALD ELECTROSTATICS */
1578
1579             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1580             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
1581             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1582             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1583             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1584
1585             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1586                                          &ewtabF,&ewtabFn);
1587             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1588             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
1589
1590             fscal            = felec;
1591
1592             /* Update vectorial force */
1593             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1594             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1595             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1596             
1597             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1598             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1599             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1600
1601             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1602
1603             /* Inner loop uses 351 flops */
1604         }
1605
1606         if(jidx<j_index_end)
1607         {
1608
1609             jnrA             = jjnr[jidx];
1610             j_coord_offsetA  = DIM*jnrA;
1611
1612             /* load j atom coordinates */
1613             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
1614                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
1615
1616             /* Calculate displacement vector */
1617             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1618             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1619             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1620             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1621             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1622             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1623             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1624             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1625             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1626             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1627             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1628             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1629             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1630             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1631             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1632             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1633             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1634             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1635             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1636             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1637             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1638             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1639             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1640             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1641             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1642             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1643             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1644
1645             /* Calculate squared distance and things based on it */
1646             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1647             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1648             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1649             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1650             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1651             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1652             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1653             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1654             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1655
1656             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1657             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1658             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1659             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1660             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1661             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1662             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1663             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1664             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1665
1666             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1667             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1668             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1669             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1670             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1671             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1672             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1673             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1674             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1675
1676             fjx1             = _fjsp_setzero_v2r8();
1677             fjy1             = _fjsp_setzero_v2r8();
1678             fjz1             = _fjsp_setzero_v2r8();
1679             fjx2             = _fjsp_setzero_v2r8();
1680             fjy2             = _fjsp_setzero_v2r8();
1681             fjz2             = _fjsp_setzero_v2r8();
1682             fjx3             = _fjsp_setzero_v2r8();
1683             fjy3             = _fjsp_setzero_v2r8();
1684             fjz3             = _fjsp_setzero_v2r8();
1685
1686             /**************************
1687              * CALCULATE INTERACTIONS *
1688              **************************/
1689
1690             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1691
1692             /* EWALD ELECTROSTATICS */
1693
1694             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1695             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1696             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1697             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1698             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1699
1700             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1701             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1702             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1703
1704             fscal            = felec;
1705
1706             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1707
1708             /* Update vectorial force */
1709             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1710             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1711             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1712             
1713             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1714             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1715             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1716
1717             /**************************
1718              * CALCULATE INTERACTIONS *
1719              **************************/
1720
1721             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1722
1723             /* EWALD ELECTROSTATICS */
1724
1725             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1726             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1727             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1728             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1729             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1730
1731             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1732             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1733             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1734
1735             fscal            = felec;
1736
1737             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1738
1739             /* Update vectorial force */
1740             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1741             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1742             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1743             
1744             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1745             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1746             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1747
1748             /**************************
1749              * CALCULATE INTERACTIONS *
1750              **************************/
1751
1752             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
1753
1754             /* EWALD ELECTROSTATICS */
1755
1756             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1757             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
1758             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1759             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1760             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1761
1762             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1763             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1764             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
1765
1766             fscal            = felec;
1767
1768             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1769
1770             /* Update vectorial force */
1771             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1772             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1773             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1774             
1775             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1776             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1777             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1778
1779             /**************************
1780              * CALCULATE INTERACTIONS *
1781              **************************/
1782
1783             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1784
1785             /* EWALD ELECTROSTATICS */
1786
1787             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1788             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1789             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1790             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1791             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1792
1793             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1794             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1795             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1796
1797             fscal            = felec;
1798
1799             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1800
1801             /* Update vectorial force */
1802             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1803             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1804             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1805             
1806             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1807             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1808             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1809
1810             /**************************
1811              * CALCULATE INTERACTIONS *
1812              **************************/
1813
1814             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1815
1816             /* EWALD ELECTROSTATICS */
1817
1818             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1819             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1820             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1821             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1822             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1823
1824             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1825             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1826             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1827
1828             fscal            = felec;
1829
1830             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1831
1832             /* Update vectorial force */
1833             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1834             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1835             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1836             
1837             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1838             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1839             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1840
1841             /**************************
1842              * CALCULATE INTERACTIONS *
1843              **************************/
1844
1845             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
1846
1847             /* EWALD ELECTROSTATICS */
1848
1849             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1850             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
1851             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1852             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1853             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1854
1855             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1856             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1857             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
1858
1859             fscal            = felec;
1860
1861             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1862
1863             /* Update vectorial force */
1864             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1865             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1866             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1867             
1868             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1869             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1870             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1871
1872             /**************************
1873              * CALCULATE INTERACTIONS *
1874              **************************/
1875
1876             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
1877
1878             /* EWALD ELECTROSTATICS */
1879
1880             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1881             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
1882             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1883             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1884             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1885
1886             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1887             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1888             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
1889
1890             fscal            = felec;
1891
1892             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1893
1894             /* Update vectorial force */
1895             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1896             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1897             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1898             
1899             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1900             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1901             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1902
1903             /**************************
1904              * CALCULATE INTERACTIONS *
1905              **************************/
1906
1907             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
1908
1909             /* EWALD ELECTROSTATICS */
1910
1911             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1912             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
1913             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1914             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1915             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1916
1917             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1918             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1919             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
1920
1921             fscal            = felec;
1922
1923             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1924
1925             /* Update vectorial force */
1926             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1927             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1928             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1929             
1930             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1931             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1932             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1933
1934             /**************************
1935              * CALCULATE INTERACTIONS *
1936              **************************/
1937
1938             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
1939
1940             /* EWALD ELECTROSTATICS */
1941
1942             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1943             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
1944             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1945             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1946             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1947
1948             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1949             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1950             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
1951
1952             fscal            = felec;
1953
1954             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1955
1956             /* Update vectorial force */
1957             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1958             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1959             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1960             
1961             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1962             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1963             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1964
1965             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1966
1967             /* Inner loop uses 351 flops */
1968         }
1969
1970         /* End of innermost loop */
1971
1972         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1973                                               f+i_coord_offset+DIM,fshift+i_shift_offset);
1974
1975         /* Increment number of inner iterations */
1976         inneriter                  += j_index_end - j_index_start;
1977
1978         /* Outer loop uses 18 flops */
1979     }
1980
1981     /* Increment number of outer iterations */
1982     outeriter        += nri;
1983
1984     /* Update outer/inner flops */
1985
1986     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*351);
1987 }