Remove all unnecessary HAVE_CONFIG_H
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecEw_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: Ewald
52  * VdW interaction:            LennardJones
53  * Geometry:                   Water3-Water3
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset0;
80     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
81     int              vdwioffset1;
82     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
83     int              vdwioffset2;
84     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85     int              vdwjidx0A,vdwjidx0B;
86     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87     int              vdwjidx1A,vdwjidx1B;
88     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89     int              vdwjidx2A,vdwjidx2B;
90     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
101     real             *charge;
102     int              nvdwtype;
103     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
104     int              *vdwtype;
105     real             *vdwparam;
106     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
107     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
108     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
109     real             *ewtab;
110     _fjsp_v2r8       itab_tmp;
111     _fjsp_v2r8       dummy_mask,cutoff_mask;
112     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
113     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
114     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
115
116     x                = xx[0];
117     f                = ff[0];
118
119     nri              = nlist->nri;
120     iinr             = nlist->iinr;
121     jindex           = nlist->jindex;
122     jjnr             = nlist->jjnr;
123     shiftidx         = nlist->shift;
124     gid              = nlist->gid;
125     shiftvec         = fr->shift_vec[0];
126     fshift           = fr->fshift[0];
127     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
128     charge           = mdatoms->chargeA;
129     nvdwtype         = fr->ntype;
130     vdwparam         = fr->nbfp;
131     vdwtype          = mdatoms->typeA;
132
133     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
134     ewtab            = fr->ic->tabq_coul_FDV0;
135     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
136     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
137
138     /* Setup water-specific parameters */
139     inr              = nlist->iinr[0];
140     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
141     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
142     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
143     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
144
145     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
146     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
147     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
148     vdwjidx0A        = 2*vdwtype[inr+0];
149     qq00             = _fjsp_mul_v2r8(iq0,jq0);
150     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
151     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
152     qq01             = _fjsp_mul_v2r8(iq0,jq1);
153     qq02             = _fjsp_mul_v2r8(iq0,jq2);
154     qq10             = _fjsp_mul_v2r8(iq1,jq0);
155     qq11             = _fjsp_mul_v2r8(iq1,jq1);
156     qq12             = _fjsp_mul_v2r8(iq1,jq2);
157     qq20             = _fjsp_mul_v2r8(iq2,jq0);
158     qq21             = _fjsp_mul_v2r8(iq2,jq1);
159     qq22             = _fjsp_mul_v2r8(iq2,jq2);
160
161     /* Avoid stupid compiler warnings */
162     jnrA = jnrB = 0;
163     j_coord_offsetA = 0;
164     j_coord_offsetB = 0;
165
166     outeriter        = 0;
167     inneriter        = 0;
168
169     /* Start outer loop over neighborlists */
170     for(iidx=0; iidx<nri; iidx++)
171     {
172         /* Load shift vector for this list */
173         i_shift_offset   = DIM*shiftidx[iidx];
174
175         /* Load limits for loop over neighbors */
176         j_index_start    = jindex[iidx];
177         j_index_end      = jindex[iidx+1];
178
179         /* Get outer coordinate index */
180         inr              = iinr[iidx];
181         i_coord_offset   = DIM*inr;
182
183         /* Load i particle coords and add shift vector */
184         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
185                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
186
187         fix0             = _fjsp_setzero_v2r8();
188         fiy0             = _fjsp_setzero_v2r8();
189         fiz0             = _fjsp_setzero_v2r8();
190         fix1             = _fjsp_setzero_v2r8();
191         fiy1             = _fjsp_setzero_v2r8();
192         fiz1             = _fjsp_setzero_v2r8();
193         fix2             = _fjsp_setzero_v2r8();
194         fiy2             = _fjsp_setzero_v2r8();
195         fiz2             = _fjsp_setzero_v2r8();
196
197         /* Reset potential sums */
198         velecsum         = _fjsp_setzero_v2r8();
199         vvdwsum          = _fjsp_setzero_v2r8();
200
201         /* Start inner kernel loop */
202         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
203         {
204
205             /* Get j neighbor index, and coordinate index */
206             jnrA             = jjnr[jidx];
207             jnrB             = jjnr[jidx+1];
208             j_coord_offsetA  = DIM*jnrA;
209             j_coord_offsetB  = DIM*jnrB;
210
211             /* load j atom coordinates */
212             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
213                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
214
215             /* Calculate displacement vector */
216             dx00             = _fjsp_sub_v2r8(ix0,jx0);
217             dy00             = _fjsp_sub_v2r8(iy0,jy0);
218             dz00             = _fjsp_sub_v2r8(iz0,jz0);
219             dx01             = _fjsp_sub_v2r8(ix0,jx1);
220             dy01             = _fjsp_sub_v2r8(iy0,jy1);
221             dz01             = _fjsp_sub_v2r8(iz0,jz1);
222             dx02             = _fjsp_sub_v2r8(ix0,jx2);
223             dy02             = _fjsp_sub_v2r8(iy0,jy2);
224             dz02             = _fjsp_sub_v2r8(iz0,jz2);
225             dx10             = _fjsp_sub_v2r8(ix1,jx0);
226             dy10             = _fjsp_sub_v2r8(iy1,jy0);
227             dz10             = _fjsp_sub_v2r8(iz1,jz0);
228             dx11             = _fjsp_sub_v2r8(ix1,jx1);
229             dy11             = _fjsp_sub_v2r8(iy1,jy1);
230             dz11             = _fjsp_sub_v2r8(iz1,jz1);
231             dx12             = _fjsp_sub_v2r8(ix1,jx2);
232             dy12             = _fjsp_sub_v2r8(iy1,jy2);
233             dz12             = _fjsp_sub_v2r8(iz1,jz2);
234             dx20             = _fjsp_sub_v2r8(ix2,jx0);
235             dy20             = _fjsp_sub_v2r8(iy2,jy0);
236             dz20             = _fjsp_sub_v2r8(iz2,jz0);
237             dx21             = _fjsp_sub_v2r8(ix2,jx1);
238             dy21             = _fjsp_sub_v2r8(iy2,jy1);
239             dz21             = _fjsp_sub_v2r8(iz2,jz1);
240             dx22             = _fjsp_sub_v2r8(ix2,jx2);
241             dy22             = _fjsp_sub_v2r8(iy2,jy2);
242             dz22             = _fjsp_sub_v2r8(iz2,jz2);
243
244             /* Calculate squared distance and things based on it */
245             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
246             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
247             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
248             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
249             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
250             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
251             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
252             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
253             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
254
255             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
256             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
257             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
258             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
259             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
260             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
261             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
262             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
263             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
264
265             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
266             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
267             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
268             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
269             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
270             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
271             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
272             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
273             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
274
275             fjx0             = _fjsp_setzero_v2r8();
276             fjy0             = _fjsp_setzero_v2r8();
277             fjz0             = _fjsp_setzero_v2r8();
278             fjx1             = _fjsp_setzero_v2r8();
279             fjy1             = _fjsp_setzero_v2r8();
280             fjz1             = _fjsp_setzero_v2r8();
281             fjx2             = _fjsp_setzero_v2r8();
282             fjy2             = _fjsp_setzero_v2r8();
283             fjz2             = _fjsp_setzero_v2r8();
284
285             /**************************
286              * CALCULATE INTERACTIONS *
287              **************************/
288
289             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
290
291             /* EWALD ELECTROSTATICS */
292
293             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
294             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
295             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
296             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
297             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
298
299             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
300             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
301             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
302             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
303             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
304             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
305             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
306             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
307             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
308             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
309
310             /* LENNARD-JONES DISPERSION/REPULSION */
311
312             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
313             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
314             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
315             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
316             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
317
318             /* Update potential sum for this i atom from the interaction with this j atom. */
319             velecsum         = _fjsp_add_v2r8(velecsum,velec);
320             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
321
322             fscal            = _fjsp_add_v2r8(felec,fvdw);
323
324             /* Update vectorial force */
325             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
326             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
327             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
328             
329             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
330             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
331             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
332
333             /**************************
334              * CALCULATE INTERACTIONS *
335              **************************/
336
337             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
338
339             /* EWALD ELECTROSTATICS */
340
341             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
342             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
343             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
344             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
345             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
346
347             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
348             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
349             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
350             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
351             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
352             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
353             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
354             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
355             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
356             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
357
358             /* Update potential sum for this i atom from the interaction with this j atom. */
359             velecsum         = _fjsp_add_v2r8(velecsum,velec);
360
361             fscal            = felec;
362
363             /* Update vectorial force */
364             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
365             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
366             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
367             
368             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
369             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
370             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
371
372             /**************************
373              * CALCULATE INTERACTIONS *
374              **************************/
375
376             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
377
378             /* EWALD ELECTROSTATICS */
379
380             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
381             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
382             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
383             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
384             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
385
386             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
387             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
388             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
389             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
390             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
391             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
392             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
393             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
394             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
395             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
396
397             /* Update potential sum for this i atom from the interaction with this j atom. */
398             velecsum         = _fjsp_add_v2r8(velecsum,velec);
399
400             fscal            = felec;
401
402             /* Update vectorial force */
403             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
404             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
405             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
406             
407             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
408             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
409             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
410
411             /**************************
412              * CALCULATE INTERACTIONS *
413              **************************/
414
415             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
416
417             /* EWALD ELECTROSTATICS */
418
419             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
420             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
421             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
422             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
423             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
424
425             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
426             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
427             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
428             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
429             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
430             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
431             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
432             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
433             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
434             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
435
436             /* Update potential sum for this i atom from the interaction with this j atom. */
437             velecsum         = _fjsp_add_v2r8(velecsum,velec);
438
439             fscal            = felec;
440
441             /* Update vectorial force */
442             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
443             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
444             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
445             
446             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
447             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
448             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
449
450             /**************************
451              * CALCULATE INTERACTIONS *
452              **************************/
453
454             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
455
456             /* EWALD ELECTROSTATICS */
457
458             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
459             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
460             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
461             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
462             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
463
464             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
465             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
466             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
467             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
468             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
469             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
470             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
471             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
472             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
473             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
474
475             /* Update potential sum for this i atom from the interaction with this j atom. */
476             velecsum         = _fjsp_add_v2r8(velecsum,velec);
477
478             fscal            = felec;
479
480             /* Update vectorial force */
481             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
482             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
483             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
484             
485             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
486             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
487             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
488
489             /**************************
490              * CALCULATE INTERACTIONS *
491              **************************/
492
493             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
494
495             /* EWALD ELECTROSTATICS */
496
497             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
498             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
499             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
500             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
501             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
502
503             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
504             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
505             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
506             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
507             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
508             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
509             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
510             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
511             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
512             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
513
514             /* Update potential sum for this i atom from the interaction with this j atom. */
515             velecsum         = _fjsp_add_v2r8(velecsum,velec);
516
517             fscal            = felec;
518
519             /* Update vectorial force */
520             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
521             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
522             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
523             
524             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
525             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
526             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
527
528             /**************************
529              * CALCULATE INTERACTIONS *
530              **************************/
531
532             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
533
534             /* EWALD ELECTROSTATICS */
535
536             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
537             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
538             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
539             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
540             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
541
542             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
543             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
544             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
545             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
546             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
547             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
548             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
549             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
550             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
551             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
552
553             /* Update potential sum for this i atom from the interaction with this j atom. */
554             velecsum         = _fjsp_add_v2r8(velecsum,velec);
555
556             fscal            = felec;
557
558             /* Update vectorial force */
559             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
560             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
561             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
562             
563             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
564             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
565             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
566
567             /**************************
568              * CALCULATE INTERACTIONS *
569              **************************/
570
571             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
572
573             /* EWALD ELECTROSTATICS */
574
575             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
576             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
577             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
578             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
579             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
580
581             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
582             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
583             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
584             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
585             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
586             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
587             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
588             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
589             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
590             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
591
592             /* Update potential sum for this i atom from the interaction with this j atom. */
593             velecsum         = _fjsp_add_v2r8(velecsum,velec);
594
595             fscal            = felec;
596
597             /* Update vectorial force */
598             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
599             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
600             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
601             
602             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
603             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
604             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
605
606             /**************************
607              * CALCULATE INTERACTIONS *
608              **************************/
609
610             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
611
612             /* EWALD ELECTROSTATICS */
613
614             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
615             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
616             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
617             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
618             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
619
620             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
621             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
622             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
623             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
624             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
625             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
626             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
627             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
628             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
629             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
630
631             /* Update potential sum for this i atom from the interaction with this j atom. */
632             velecsum         = _fjsp_add_v2r8(velecsum,velec);
633
634             fscal            = felec;
635
636             /* Update vectorial force */
637             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
638             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
639             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
640             
641             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
642             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
643             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
644
645             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
646
647             /* Inner loop uses 408 flops */
648         }
649
650         if(jidx<j_index_end)
651         {
652
653             jnrA             = jjnr[jidx];
654             j_coord_offsetA  = DIM*jnrA;
655
656             /* load j atom coordinates */
657             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
658                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
659
660             /* Calculate displacement vector */
661             dx00             = _fjsp_sub_v2r8(ix0,jx0);
662             dy00             = _fjsp_sub_v2r8(iy0,jy0);
663             dz00             = _fjsp_sub_v2r8(iz0,jz0);
664             dx01             = _fjsp_sub_v2r8(ix0,jx1);
665             dy01             = _fjsp_sub_v2r8(iy0,jy1);
666             dz01             = _fjsp_sub_v2r8(iz0,jz1);
667             dx02             = _fjsp_sub_v2r8(ix0,jx2);
668             dy02             = _fjsp_sub_v2r8(iy0,jy2);
669             dz02             = _fjsp_sub_v2r8(iz0,jz2);
670             dx10             = _fjsp_sub_v2r8(ix1,jx0);
671             dy10             = _fjsp_sub_v2r8(iy1,jy0);
672             dz10             = _fjsp_sub_v2r8(iz1,jz0);
673             dx11             = _fjsp_sub_v2r8(ix1,jx1);
674             dy11             = _fjsp_sub_v2r8(iy1,jy1);
675             dz11             = _fjsp_sub_v2r8(iz1,jz1);
676             dx12             = _fjsp_sub_v2r8(ix1,jx2);
677             dy12             = _fjsp_sub_v2r8(iy1,jy2);
678             dz12             = _fjsp_sub_v2r8(iz1,jz2);
679             dx20             = _fjsp_sub_v2r8(ix2,jx0);
680             dy20             = _fjsp_sub_v2r8(iy2,jy0);
681             dz20             = _fjsp_sub_v2r8(iz2,jz0);
682             dx21             = _fjsp_sub_v2r8(ix2,jx1);
683             dy21             = _fjsp_sub_v2r8(iy2,jy1);
684             dz21             = _fjsp_sub_v2r8(iz2,jz1);
685             dx22             = _fjsp_sub_v2r8(ix2,jx2);
686             dy22             = _fjsp_sub_v2r8(iy2,jy2);
687             dz22             = _fjsp_sub_v2r8(iz2,jz2);
688
689             /* Calculate squared distance and things based on it */
690             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
691             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
692             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
693             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
694             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
695             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
696             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
697             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
698             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
699
700             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
701             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
702             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
703             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
704             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
705             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
706             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
707             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
708             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
709
710             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
711             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
712             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
713             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
714             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
715             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
716             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
717             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
718             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
719
720             fjx0             = _fjsp_setzero_v2r8();
721             fjy0             = _fjsp_setzero_v2r8();
722             fjz0             = _fjsp_setzero_v2r8();
723             fjx1             = _fjsp_setzero_v2r8();
724             fjy1             = _fjsp_setzero_v2r8();
725             fjz1             = _fjsp_setzero_v2r8();
726             fjx2             = _fjsp_setzero_v2r8();
727             fjy2             = _fjsp_setzero_v2r8();
728             fjz2             = _fjsp_setzero_v2r8();
729
730             /**************************
731              * CALCULATE INTERACTIONS *
732              **************************/
733
734             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
735
736             /* EWALD ELECTROSTATICS */
737
738             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
739             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
740             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
741             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
742             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
743
744             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
745             ewtabD           = _fjsp_setzero_v2r8();
746             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
747             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
748             ewtabFn          = _fjsp_setzero_v2r8();
749             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
750             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
751             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
752             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
753             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
754
755             /* LENNARD-JONES DISPERSION/REPULSION */
756
757             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
758             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
759             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
760             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
761             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
762
763             /* Update potential sum for this i atom from the interaction with this j atom. */
764             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
765             velecsum         = _fjsp_add_v2r8(velecsum,velec);
766             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
767             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
768
769             fscal            = _fjsp_add_v2r8(felec,fvdw);
770
771             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
772
773             /* Update vectorial force */
774             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
775             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
776             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
777             
778             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
779             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
780             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
781
782             /**************************
783              * CALCULATE INTERACTIONS *
784              **************************/
785
786             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
787
788             /* EWALD ELECTROSTATICS */
789
790             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
791             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
792             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
793             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
794             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
795
796             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
797             ewtabD           = _fjsp_setzero_v2r8();
798             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
799             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
800             ewtabFn          = _fjsp_setzero_v2r8();
801             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
802             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
803             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
804             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
805             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
806
807             /* Update potential sum for this i atom from the interaction with this j atom. */
808             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
809             velecsum         = _fjsp_add_v2r8(velecsum,velec);
810
811             fscal            = felec;
812
813             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
814
815             /* Update vectorial force */
816             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
817             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
818             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
819             
820             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
821             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
822             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
823
824             /**************************
825              * CALCULATE INTERACTIONS *
826              **************************/
827
828             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
829
830             /* EWALD ELECTROSTATICS */
831
832             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
833             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
834             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
835             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
836             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
837
838             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
839             ewtabD           = _fjsp_setzero_v2r8();
840             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
841             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
842             ewtabFn          = _fjsp_setzero_v2r8();
843             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
844             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
845             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
846             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
847             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
848
849             /* Update potential sum for this i atom from the interaction with this j atom. */
850             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
851             velecsum         = _fjsp_add_v2r8(velecsum,velec);
852
853             fscal            = felec;
854
855             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
856
857             /* Update vectorial force */
858             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
859             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
860             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
861             
862             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
863             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
864             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
865
866             /**************************
867              * CALCULATE INTERACTIONS *
868              **************************/
869
870             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
871
872             /* EWALD ELECTROSTATICS */
873
874             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
875             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
876             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
877             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
878             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
879
880             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
881             ewtabD           = _fjsp_setzero_v2r8();
882             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
883             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
884             ewtabFn          = _fjsp_setzero_v2r8();
885             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
886             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
887             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
888             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
889             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
890
891             /* Update potential sum for this i atom from the interaction with this j atom. */
892             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
893             velecsum         = _fjsp_add_v2r8(velecsum,velec);
894
895             fscal            = felec;
896
897             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
898
899             /* Update vectorial force */
900             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
901             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
902             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
903             
904             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
905             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
906             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
907
908             /**************************
909              * CALCULATE INTERACTIONS *
910              **************************/
911
912             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
913
914             /* EWALD ELECTROSTATICS */
915
916             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
917             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
918             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
919             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
920             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
921
922             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
923             ewtabD           = _fjsp_setzero_v2r8();
924             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
925             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
926             ewtabFn          = _fjsp_setzero_v2r8();
927             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
928             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
929             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
930             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
931             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
932
933             /* Update potential sum for this i atom from the interaction with this j atom. */
934             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
935             velecsum         = _fjsp_add_v2r8(velecsum,velec);
936
937             fscal            = felec;
938
939             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
940
941             /* Update vectorial force */
942             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
943             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
944             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
945             
946             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
947             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
948             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
949
950             /**************************
951              * CALCULATE INTERACTIONS *
952              **************************/
953
954             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
955
956             /* EWALD ELECTROSTATICS */
957
958             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
959             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
960             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
961             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
962             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
963
964             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
965             ewtabD           = _fjsp_setzero_v2r8();
966             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
967             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
968             ewtabFn          = _fjsp_setzero_v2r8();
969             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
970             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
971             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
972             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
973             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
974
975             /* Update potential sum for this i atom from the interaction with this j atom. */
976             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
977             velecsum         = _fjsp_add_v2r8(velecsum,velec);
978
979             fscal            = felec;
980
981             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
982
983             /* Update vectorial force */
984             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
985             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
986             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
987             
988             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
989             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
990             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
991
992             /**************************
993              * CALCULATE INTERACTIONS *
994              **************************/
995
996             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
997
998             /* EWALD ELECTROSTATICS */
999
1000             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1001             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
1002             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1003             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1004             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1005
1006             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1007             ewtabD           = _fjsp_setzero_v2r8();
1008             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1009             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1010             ewtabFn          = _fjsp_setzero_v2r8();
1011             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1012             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1013             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1014             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
1015             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1016
1017             /* Update potential sum for this i atom from the interaction with this j atom. */
1018             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1019             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1020
1021             fscal            = felec;
1022
1023             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1024
1025             /* Update vectorial force */
1026             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1027             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1028             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1029             
1030             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1031             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1032             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1033
1034             /**************************
1035              * CALCULATE INTERACTIONS *
1036              **************************/
1037
1038             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1039
1040             /* EWALD ELECTROSTATICS */
1041
1042             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1043             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1044             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1045             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1046             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1047
1048             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1049             ewtabD           = _fjsp_setzero_v2r8();
1050             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1051             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1052             ewtabFn          = _fjsp_setzero_v2r8();
1053             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1054             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1055             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1056             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
1057             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1058
1059             /* Update potential sum for this i atom from the interaction with this j atom. */
1060             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1061             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1062
1063             fscal            = felec;
1064
1065             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1066
1067             /* Update vectorial force */
1068             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1069             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1070             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1071             
1072             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1073             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1074             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1075
1076             /**************************
1077              * CALCULATE INTERACTIONS *
1078              **************************/
1079
1080             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1081
1082             /* EWALD ELECTROSTATICS */
1083
1084             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1085             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1086             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1087             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1088             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1089
1090             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1091             ewtabD           = _fjsp_setzero_v2r8();
1092             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1093             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1094             ewtabFn          = _fjsp_setzero_v2r8();
1095             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1096             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1097             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1098             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
1099             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1100
1101             /* Update potential sum for this i atom from the interaction with this j atom. */
1102             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1103             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1104
1105             fscal            = felec;
1106
1107             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1108
1109             /* Update vectorial force */
1110             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1111             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1112             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1113             
1114             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1115             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1116             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1117
1118             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1119
1120             /* Inner loop uses 408 flops */
1121         }
1122
1123         /* End of innermost loop */
1124
1125         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1126                                               f+i_coord_offset,fshift+i_shift_offset);
1127
1128         ggid                        = gid[iidx];
1129         /* Update potential energies */
1130         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1131         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1132
1133         /* Increment number of inner iterations */
1134         inneriter                  += j_index_end - j_index_start;
1135
1136         /* Outer loop uses 20 flops */
1137     }
1138
1139     /* Increment number of outer iterations */
1140     outeriter        += nri;
1141
1142     /* Update outer/inner flops */
1143
1144     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*408);
1145 }
1146 /*
1147  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
1148  * Electrostatics interaction: Ewald
1149  * VdW interaction:            LennardJones
1150  * Geometry:                   Water3-Water3
1151  * Calculate force/pot:        Force
1152  */
1153 void
1154 nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
1155                     (t_nblist                    * gmx_restrict       nlist,
1156                      rvec                        * gmx_restrict          xx,
1157                      rvec                        * gmx_restrict          ff,
1158                      t_forcerec                  * gmx_restrict          fr,
1159                      t_mdatoms                   * gmx_restrict     mdatoms,
1160                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1161                      t_nrnb                      * gmx_restrict        nrnb)
1162 {
1163     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1164      * just 0 for non-waters.
1165      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1166      * jnr indices corresponding to data put in the four positions in the SIMD register.
1167      */
1168     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1169     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1170     int              jnrA,jnrB;
1171     int              j_coord_offsetA,j_coord_offsetB;
1172     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1173     real             rcutoff_scalar;
1174     real             *shiftvec,*fshift,*x,*f;
1175     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1176     int              vdwioffset0;
1177     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1178     int              vdwioffset1;
1179     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1180     int              vdwioffset2;
1181     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1182     int              vdwjidx0A,vdwjidx0B;
1183     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1184     int              vdwjidx1A,vdwjidx1B;
1185     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1186     int              vdwjidx2A,vdwjidx2B;
1187     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1188     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1189     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1190     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1191     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1192     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1193     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1194     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1195     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1196     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1197     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1198     real             *charge;
1199     int              nvdwtype;
1200     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1201     int              *vdwtype;
1202     real             *vdwparam;
1203     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1204     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1205     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1206     real             *ewtab;
1207     _fjsp_v2r8       itab_tmp;
1208     _fjsp_v2r8       dummy_mask,cutoff_mask;
1209     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1210     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1211     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1212
1213     x                = xx[0];
1214     f                = ff[0];
1215
1216     nri              = nlist->nri;
1217     iinr             = nlist->iinr;
1218     jindex           = nlist->jindex;
1219     jjnr             = nlist->jjnr;
1220     shiftidx         = nlist->shift;
1221     gid              = nlist->gid;
1222     shiftvec         = fr->shift_vec[0];
1223     fshift           = fr->fshift[0];
1224     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1225     charge           = mdatoms->chargeA;
1226     nvdwtype         = fr->ntype;
1227     vdwparam         = fr->nbfp;
1228     vdwtype          = mdatoms->typeA;
1229
1230     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1231     ewtab            = fr->ic->tabq_coul_F;
1232     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1233     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1234
1235     /* Setup water-specific parameters */
1236     inr              = nlist->iinr[0];
1237     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1238     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1239     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1240     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1241
1242     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1243     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1244     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1245     vdwjidx0A        = 2*vdwtype[inr+0];
1246     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1247     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1248     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1249     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1250     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1251     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1252     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1253     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1254     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1255     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1256     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1257
1258     /* Avoid stupid compiler warnings */
1259     jnrA = jnrB = 0;
1260     j_coord_offsetA = 0;
1261     j_coord_offsetB = 0;
1262
1263     outeriter        = 0;
1264     inneriter        = 0;
1265
1266     /* Start outer loop over neighborlists */
1267     for(iidx=0; iidx<nri; iidx++)
1268     {
1269         /* Load shift vector for this list */
1270         i_shift_offset   = DIM*shiftidx[iidx];
1271
1272         /* Load limits for loop over neighbors */
1273         j_index_start    = jindex[iidx];
1274         j_index_end      = jindex[iidx+1];
1275
1276         /* Get outer coordinate index */
1277         inr              = iinr[iidx];
1278         i_coord_offset   = DIM*inr;
1279
1280         /* Load i particle coords and add shift vector */
1281         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1282                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1283
1284         fix0             = _fjsp_setzero_v2r8();
1285         fiy0             = _fjsp_setzero_v2r8();
1286         fiz0             = _fjsp_setzero_v2r8();
1287         fix1             = _fjsp_setzero_v2r8();
1288         fiy1             = _fjsp_setzero_v2r8();
1289         fiz1             = _fjsp_setzero_v2r8();
1290         fix2             = _fjsp_setzero_v2r8();
1291         fiy2             = _fjsp_setzero_v2r8();
1292         fiz2             = _fjsp_setzero_v2r8();
1293
1294         /* Start inner kernel loop */
1295         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1296         {
1297
1298             /* Get j neighbor index, and coordinate index */
1299             jnrA             = jjnr[jidx];
1300             jnrB             = jjnr[jidx+1];
1301             j_coord_offsetA  = DIM*jnrA;
1302             j_coord_offsetB  = DIM*jnrB;
1303
1304             /* load j atom coordinates */
1305             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1306                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1307
1308             /* Calculate displacement vector */
1309             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1310             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1311             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1312             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1313             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1314             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1315             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1316             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1317             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1318             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1319             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1320             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1321             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1322             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1323             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1324             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1325             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1326             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1327             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1328             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1329             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1330             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1331             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1332             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1333             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1334             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1335             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1336
1337             /* Calculate squared distance and things based on it */
1338             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1339             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1340             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1341             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1342             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1343             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1344             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1345             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1346             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1347
1348             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1349             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1350             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1351             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1352             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1353             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1354             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1355             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1356             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1357
1358             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1359             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1360             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1361             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1362             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1363             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1364             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1365             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1366             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1367
1368             fjx0             = _fjsp_setzero_v2r8();
1369             fjy0             = _fjsp_setzero_v2r8();
1370             fjz0             = _fjsp_setzero_v2r8();
1371             fjx1             = _fjsp_setzero_v2r8();
1372             fjy1             = _fjsp_setzero_v2r8();
1373             fjz1             = _fjsp_setzero_v2r8();
1374             fjx2             = _fjsp_setzero_v2r8();
1375             fjy2             = _fjsp_setzero_v2r8();
1376             fjz2             = _fjsp_setzero_v2r8();
1377
1378             /**************************
1379              * CALCULATE INTERACTIONS *
1380              **************************/
1381
1382             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1383
1384             /* EWALD ELECTROSTATICS */
1385
1386             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1387             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
1388             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1389             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1390             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1391
1392             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1393                                          &ewtabF,&ewtabFn);
1394             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1395             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1396
1397             /* LENNARD-JONES DISPERSION/REPULSION */
1398
1399             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1400             fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1401
1402             fscal            = _fjsp_add_v2r8(felec,fvdw);
1403
1404             /* Update vectorial force */
1405             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1406             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1407             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1408             
1409             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1410             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1411             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1412
1413             /**************************
1414              * CALCULATE INTERACTIONS *
1415              **************************/
1416
1417             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1418
1419             /* EWALD ELECTROSTATICS */
1420
1421             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1422             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
1423             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1424             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1425             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1426
1427             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1428                                          &ewtabF,&ewtabFn);
1429             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1430             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1431
1432             fscal            = felec;
1433
1434             /* Update vectorial force */
1435             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1436             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1437             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1438             
1439             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1440             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1441             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1442
1443             /**************************
1444              * CALCULATE INTERACTIONS *
1445              **************************/
1446
1447             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
1448
1449             /* EWALD ELECTROSTATICS */
1450
1451             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1452             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
1453             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1454             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1455             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1456
1457             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1458                                          &ewtabF,&ewtabFn);
1459             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1460             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1461
1462             fscal            = felec;
1463
1464             /* Update vectorial force */
1465             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1466             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1467             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1468             
1469             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1470             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1471             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1472
1473             /**************************
1474              * CALCULATE INTERACTIONS *
1475              **************************/
1476
1477             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
1478
1479             /* EWALD ELECTROSTATICS */
1480
1481             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1482             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
1483             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1484             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1485             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1486
1487             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1488                                          &ewtabF,&ewtabFn);
1489             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1490             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1491
1492             fscal            = felec;
1493
1494             /* Update vectorial force */
1495             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1496             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1497             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1498             
1499             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1500             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1501             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1502
1503             /**************************
1504              * CALCULATE INTERACTIONS *
1505              **************************/
1506
1507             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1508
1509             /* EWALD ELECTROSTATICS */
1510
1511             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1512             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1513             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1514             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1515             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1516
1517             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1518                                          &ewtabF,&ewtabFn);
1519             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1520             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1521
1522             fscal            = felec;
1523
1524             /* Update vectorial force */
1525             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1526             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1527             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1528             
1529             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1530             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1531             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1532
1533             /**************************
1534              * CALCULATE INTERACTIONS *
1535              **************************/
1536
1537             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1538
1539             /* EWALD ELECTROSTATICS */
1540
1541             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1542             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1543             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1544             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1545             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1546
1547             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1548                                          &ewtabF,&ewtabFn);
1549             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1550             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1551
1552             fscal            = felec;
1553
1554             /* Update vectorial force */
1555             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1556             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1557             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1558             
1559             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1560             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1561             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1562
1563             /**************************
1564              * CALCULATE INTERACTIONS *
1565              **************************/
1566
1567             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1568
1569             /* EWALD ELECTROSTATICS */
1570
1571             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1572             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
1573             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1574             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1575             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1576
1577             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1578                                          &ewtabF,&ewtabFn);
1579             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1580             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1581
1582             fscal            = felec;
1583
1584             /* Update vectorial force */
1585             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1586             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1587             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1588             
1589             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1590             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1591             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1592
1593             /**************************
1594              * CALCULATE INTERACTIONS *
1595              **************************/
1596
1597             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1598
1599             /* EWALD ELECTROSTATICS */
1600
1601             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1602             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1603             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1604             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1605             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1606
1607             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1608                                          &ewtabF,&ewtabFn);
1609             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1610             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1611
1612             fscal            = felec;
1613
1614             /* Update vectorial force */
1615             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1616             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1617             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1618             
1619             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1620             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1621             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1622
1623             /**************************
1624              * CALCULATE INTERACTIONS *
1625              **************************/
1626
1627             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1628
1629             /* EWALD ELECTROSTATICS */
1630
1631             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1632             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1633             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1634             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1635             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1636
1637             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1638                                          &ewtabF,&ewtabFn);
1639             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1640             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1641
1642             fscal            = felec;
1643
1644             /* Update vectorial force */
1645             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1646             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1647             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1648             
1649             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1650             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1651             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1652
1653             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1654
1655             /* Inner loop uses 358 flops */
1656         }
1657
1658         if(jidx<j_index_end)
1659         {
1660
1661             jnrA             = jjnr[jidx];
1662             j_coord_offsetA  = DIM*jnrA;
1663
1664             /* load j atom coordinates */
1665             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1666                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1667
1668             /* Calculate displacement vector */
1669             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1670             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1671             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1672             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1673             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1674             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1675             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1676             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1677             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1678             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1679             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1680             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1681             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1682             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1683             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1684             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1685             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1686             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1687             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1688             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1689             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1690             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1691             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1692             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1693             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1694             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1695             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1696
1697             /* Calculate squared distance and things based on it */
1698             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1699             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1700             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1701             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1702             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1703             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1704             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1705             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1706             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1707
1708             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1709             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1710             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1711             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1712             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1713             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1714             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1715             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1716             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1717
1718             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1719             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1720             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1721             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1722             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1723             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1724             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1725             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1726             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1727
1728             fjx0             = _fjsp_setzero_v2r8();
1729             fjy0             = _fjsp_setzero_v2r8();
1730             fjz0             = _fjsp_setzero_v2r8();
1731             fjx1             = _fjsp_setzero_v2r8();
1732             fjy1             = _fjsp_setzero_v2r8();
1733             fjz1             = _fjsp_setzero_v2r8();
1734             fjx2             = _fjsp_setzero_v2r8();
1735             fjy2             = _fjsp_setzero_v2r8();
1736             fjz2             = _fjsp_setzero_v2r8();
1737
1738             /**************************
1739              * CALCULATE INTERACTIONS *
1740              **************************/
1741
1742             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1743
1744             /* EWALD ELECTROSTATICS */
1745
1746             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1747             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
1748             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1749             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1750             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1751
1752             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1753             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1754             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1755
1756             /* LENNARD-JONES DISPERSION/REPULSION */
1757
1758             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1759             fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1760
1761             fscal            = _fjsp_add_v2r8(felec,fvdw);
1762
1763             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1764
1765             /* Update vectorial force */
1766             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1767             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1768             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1769             
1770             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1771             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1772             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1773
1774             /**************************
1775              * CALCULATE INTERACTIONS *
1776              **************************/
1777
1778             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1779
1780             /* EWALD ELECTROSTATICS */
1781
1782             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1783             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
1784             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1785             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1786             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1787
1788             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1789             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1790             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1791
1792             fscal            = felec;
1793
1794             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1795
1796             /* Update vectorial force */
1797             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1798             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1799             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1800             
1801             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1802             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1803             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1804
1805             /**************************
1806              * CALCULATE INTERACTIONS *
1807              **************************/
1808
1809             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
1810
1811             /* EWALD ELECTROSTATICS */
1812
1813             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1814             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
1815             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1816             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1817             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1818
1819             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1820             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1821             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1822
1823             fscal            = felec;
1824
1825             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1826
1827             /* Update vectorial force */
1828             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1829             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1830             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1831             
1832             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1833             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1834             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1835
1836             /**************************
1837              * CALCULATE INTERACTIONS *
1838              **************************/
1839
1840             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
1841
1842             /* EWALD ELECTROSTATICS */
1843
1844             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1845             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
1846             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1847             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1848             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1849
1850             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1851             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1852             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1853
1854             fscal            = felec;
1855
1856             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1857
1858             /* Update vectorial force */
1859             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1860             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1861             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1862             
1863             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1864             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1865             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1866
1867             /**************************
1868              * CALCULATE INTERACTIONS *
1869              **************************/
1870
1871             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1872
1873             /* EWALD ELECTROSTATICS */
1874
1875             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1876             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1877             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1878             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1879             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1880
1881             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1882             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1883             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1884
1885             fscal            = felec;
1886
1887             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1888
1889             /* Update vectorial force */
1890             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1891             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1892             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1893             
1894             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1895             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1896             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1897
1898             /**************************
1899              * CALCULATE INTERACTIONS *
1900              **************************/
1901
1902             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1903
1904             /* EWALD ELECTROSTATICS */
1905
1906             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1907             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1908             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1909             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1910             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1911
1912             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1913             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1914             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1915
1916             fscal            = felec;
1917
1918             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1919
1920             /* Update vectorial force */
1921             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1922             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1923             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1924             
1925             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1926             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1927             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1928
1929             /**************************
1930              * CALCULATE INTERACTIONS *
1931              **************************/
1932
1933             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1934
1935             /* EWALD ELECTROSTATICS */
1936
1937             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1938             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
1939             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1940             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1941             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1942
1943             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1944             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1945             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1946
1947             fscal            = felec;
1948
1949             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1950
1951             /* Update vectorial force */
1952             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1953             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1954             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1955             
1956             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1957             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1958             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1959
1960             /**************************
1961              * CALCULATE INTERACTIONS *
1962              **************************/
1963
1964             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1965
1966             /* EWALD ELECTROSTATICS */
1967
1968             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1969             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1970             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1971             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1972             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1973
1974             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1975             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1976             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1977
1978             fscal            = felec;
1979
1980             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1981
1982             /* Update vectorial force */
1983             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1984             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1985             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1986             
1987             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1988             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1989             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1990
1991             /**************************
1992              * CALCULATE INTERACTIONS *
1993              **************************/
1994
1995             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1996
1997             /* EWALD ELECTROSTATICS */
1998
1999             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2000             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
2001             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2002             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2003             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2004
2005             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2006             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2007             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
2008
2009             fscal            = felec;
2010
2011             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2012
2013             /* Update vectorial force */
2014             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
2015             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2016             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2017             
2018             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2019             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2020             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2021
2022             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2023
2024             /* Inner loop uses 358 flops */
2025         }
2026
2027         /* End of innermost loop */
2028
2029         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2030                                               f+i_coord_offset,fshift+i_shift_offset);
2031
2032         /* Increment number of inner iterations */
2033         inneriter                  += j_index_end - j_index_start;
2034
2035         /* Outer loop uses 18 flops */
2036     }
2037
2038     /* Increment number of outer iterations */
2039     outeriter        += nri;
2040
2041     /* Update outer/inner flops */
2042
2043     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*358);
2044 }