Remove all unnecessary HAVE_CONFIG_H
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecEw_VdwNone_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: Ewald
52  * VdW interaction:            None
53  * Geometry:                   Water3-Water3
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset0;
80     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
81     int              vdwioffset1;
82     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
83     int              vdwioffset2;
84     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85     int              vdwjidx0A,vdwjidx0B;
86     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87     int              vdwjidx1A,vdwjidx1B;
88     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89     int              vdwjidx2A,vdwjidx2B;
90     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
101     real             *charge;
102     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
103     real             *ewtab;
104     _fjsp_v2r8       itab_tmp;
105     _fjsp_v2r8       dummy_mask,cutoff_mask;
106     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
107     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
108     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
109
110     x                = xx[0];
111     f                = ff[0];
112
113     nri              = nlist->nri;
114     iinr             = nlist->iinr;
115     jindex           = nlist->jindex;
116     jjnr             = nlist->jjnr;
117     shiftidx         = nlist->shift;
118     gid              = nlist->gid;
119     shiftvec         = fr->shift_vec[0];
120     fshift           = fr->fshift[0];
121     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
122     charge           = mdatoms->chargeA;
123
124     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
125     ewtab            = fr->ic->tabq_coul_FDV0;
126     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
127     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
128
129     /* Setup water-specific parameters */
130     inr              = nlist->iinr[0];
131     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
132     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
133     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
134
135     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
136     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
137     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
138     qq00             = _fjsp_mul_v2r8(iq0,jq0);
139     qq01             = _fjsp_mul_v2r8(iq0,jq1);
140     qq02             = _fjsp_mul_v2r8(iq0,jq2);
141     qq10             = _fjsp_mul_v2r8(iq1,jq0);
142     qq11             = _fjsp_mul_v2r8(iq1,jq1);
143     qq12             = _fjsp_mul_v2r8(iq1,jq2);
144     qq20             = _fjsp_mul_v2r8(iq2,jq0);
145     qq21             = _fjsp_mul_v2r8(iq2,jq1);
146     qq22             = _fjsp_mul_v2r8(iq2,jq2);
147
148     /* Avoid stupid compiler warnings */
149     jnrA = jnrB = 0;
150     j_coord_offsetA = 0;
151     j_coord_offsetB = 0;
152
153     outeriter        = 0;
154     inneriter        = 0;
155
156     /* Start outer loop over neighborlists */
157     for(iidx=0; iidx<nri; iidx++)
158     {
159         /* Load shift vector for this list */
160         i_shift_offset   = DIM*shiftidx[iidx];
161
162         /* Load limits for loop over neighbors */
163         j_index_start    = jindex[iidx];
164         j_index_end      = jindex[iidx+1];
165
166         /* Get outer coordinate index */
167         inr              = iinr[iidx];
168         i_coord_offset   = DIM*inr;
169
170         /* Load i particle coords and add shift vector */
171         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
172                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
173
174         fix0             = _fjsp_setzero_v2r8();
175         fiy0             = _fjsp_setzero_v2r8();
176         fiz0             = _fjsp_setzero_v2r8();
177         fix1             = _fjsp_setzero_v2r8();
178         fiy1             = _fjsp_setzero_v2r8();
179         fiz1             = _fjsp_setzero_v2r8();
180         fix2             = _fjsp_setzero_v2r8();
181         fiy2             = _fjsp_setzero_v2r8();
182         fiz2             = _fjsp_setzero_v2r8();
183
184         /* Reset potential sums */
185         velecsum         = _fjsp_setzero_v2r8();
186
187         /* Start inner kernel loop */
188         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
189         {
190
191             /* Get j neighbor index, and coordinate index */
192             jnrA             = jjnr[jidx];
193             jnrB             = jjnr[jidx+1];
194             j_coord_offsetA  = DIM*jnrA;
195             j_coord_offsetB  = DIM*jnrB;
196
197             /* load j atom coordinates */
198             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
199                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
200
201             /* Calculate displacement vector */
202             dx00             = _fjsp_sub_v2r8(ix0,jx0);
203             dy00             = _fjsp_sub_v2r8(iy0,jy0);
204             dz00             = _fjsp_sub_v2r8(iz0,jz0);
205             dx01             = _fjsp_sub_v2r8(ix0,jx1);
206             dy01             = _fjsp_sub_v2r8(iy0,jy1);
207             dz01             = _fjsp_sub_v2r8(iz0,jz1);
208             dx02             = _fjsp_sub_v2r8(ix0,jx2);
209             dy02             = _fjsp_sub_v2r8(iy0,jy2);
210             dz02             = _fjsp_sub_v2r8(iz0,jz2);
211             dx10             = _fjsp_sub_v2r8(ix1,jx0);
212             dy10             = _fjsp_sub_v2r8(iy1,jy0);
213             dz10             = _fjsp_sub_v2r8(iz1,jz0);
214             dx11             = _fjsp_sub_v2r8(ix1,jx1);
215             dy11             = _fjsp_sub_v2r8(iy1,jy1);
216             dz11             = _fjsp_sub_v2r8(iz1,jz1);
217             dx12             = _fjsp_sub_v2r8(ix1,jx2);
218             dy12             = _fjsp_sub_v2r8(iy1,jy2);
219             dz12             = _fjsp_sub_v2r8(iz1,jz2);
220             dx20             = _fjsp_sub_v2r8(ix2,jx0);
221             dy20             = _fjsp_sub_v2r8(iy2,jy0);
222             dz20             = _fjsp_sub_v2r8(iz2,jz0);
223             dx21             = _fjsp_sub_v2r8(ix2,jx1);
224             dy21             = _fjsp_sub_v2r8(iy2,jy1);
225             dz21             = _fjsp_sub_v2r8(iz2,jz1);
226             dx22             = _fjsp_sub_v2r8(ix2,jx2);
227             dy22             = _fjsp_sub_v2r8(iy2,jy2);
228             dz22             = _fjsp_sub_v2r8(iz2,jz2);
229
230             /* Calculate squared distance and things based on it */
231             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
232             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
233             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
234             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
235             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
236             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
237             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
238             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
239             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
240
241             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
242             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
243             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
244             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
245             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
246             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
247             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
248             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
249             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
250
251             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
252             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
253             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
254             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
255             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
256             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
257             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
258             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
259             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
260
261             fjx0             = _fjsp_setzero_v2r8();
262             fjy0             = _fjsp_setzero_v2r8();
263             fjz0             = _fjsp_setzero_v2r8();
264             fjx1             = _fjsp_setzero_v2r8();
265             fjy1             = _fjsp_setzero_v2r8();
266             fjz1             = _fjsp_setzero_v2r8();
267             fjx2             = _fjsp_setzero_v2r8();
268             fjy2             = _fjsp_setzero_v2r8();
269             fjz2             = _fjsp_setzero_v2r8();
270
271             /**************************
272              * CALCULATE INTERACTIONS *
273              **************************/
274
275             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
276
277             /* EWALD ELECTROSTATICS */
278
279             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
280             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
281             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
282             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
283             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
284
285             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
286             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
287             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
288             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
289             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
290             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
291             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
292             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
293             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
294             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
295
296             /* Update potential sum for this i atom from the interaction with this j atom. */
297             velecsum         = _fjsp_add_v2r8(velecsum,velec);
298
299             fscal            = felec;
300
301             /* Update vectorial force */
302             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
303             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
304             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
305             
306             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
307             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
308             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
309
310             /**************************
311              * CALCULATE INTERACTIONS *
312              **************************/
313
314             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
315
316             /* EWALD ELECTROSTATICS */
317
318             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
319             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
320             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
321             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
322             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
323
324             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
325             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
326             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
327             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
328             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
329             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
330             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
331             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
332             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
333             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
334
335             /* Update potential sum for this i atom from the interaction with this j atom. */
336             velecsum         = _fjsp_add_v2r8(velecsum,velec);
337
338             fscal            = felec;
339
340             /* Update vectorial force */
341             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
342             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
343             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
344             
345             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
346             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
347             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
348
349             /**************************
350              * CALCULATE INTERACTIONS *
351              **************************/
352
353             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
354
355             /* EWALD ELECTROSTATICS */
356
357             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
358             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
359             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
360             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
361             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
362
363             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
364             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
365             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
366             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
367             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
368             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
369             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
370             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
371             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
372             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
373
374             /* Update potential sum for this i atom from the interaction with this j atom. */
375             velecsum         = _fjsp_add_v2r8(velecsum,velec);
376
377             fscal            = felec;
378
379             /* Update vectorial force */
380             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
381             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
382             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
383             
384             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
385             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
386             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
387
388             /**************************
389              * CALCULATE INTERACTIONS *
390              **************************/
391
392             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
393
394             /* EWALD ELECTROSTATICS */
395
396             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
397             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
398             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
399             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
400             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
401
402             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
403             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
404             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
405             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
406             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
407             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
408             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
409             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
410             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
411             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
412
413             /* Update potential sum for this i atom from the interaction with this j atom. */
414             velecsum         = _fjsp_add_v2r8(velecsum,velec);
415
416             fscal            = felec;
417
418             /* Update vectorial force */
419             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
420             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
421             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
422             
423             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
424             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
425             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
426
427             /**************************
428              * CALCULATE INTERACTIONS *
429              **************************/
430
431             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
432
433             /* EWALD ELECTROSTATICS */
434
435             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
436             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
437             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
438             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
439             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
440
441             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
442             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
443             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
444             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
445             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
446             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
447             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
448             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
449             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
450             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
451
452             /* Update potential sum for this i atom from the interaction with this j atom. */
453             velecsum         = _fjsp_add_v2r8(velecsum,velec);
454
455             fscal            = felec;
456
457             /* Update vectorial force */
458             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
459             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
460             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
461             
462             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
463             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
464             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
465
466             /**************************
467              * CALCULATE INTERACTIONS *
468              **************************/
469
470             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
471
472             /* EWALD ELECTROSTATICS */
473
474             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
475             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
476             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
477             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
478             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
479
480             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
481             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
482             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
483             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
484             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
485             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
486             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
487             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
488             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
489             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
490
491             /* Update potential sum for this i atom from the interaction with this j atom. */
492             velecsum         = _fjsp_add_v2r8(velecsum,velec);
493
494             fscal            = felec;
495
496             /* Update vectorial force */
497             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
498             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
499             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
500             
501             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
502             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
503             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
504
505             /**************************
506              * CALCULATE INTERACTIONS *
507              **************************/
508
509             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
510
511             /* EWALD ELECTROSTATICS */
512
513             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
514             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
515             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
516             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
517             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
518
519             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
520             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
521             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
522             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
523             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
524             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
525             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
526             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
527             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
528             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
529
530             /* Update potential sum for this i atom from the interaction with this j atom. */
531             velecsum         = _fjsp_add_v2r8(velecsum,velec);
532
533             fscal            = felec;
534
535             /* Update vectorial force */
536             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
537             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
538             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
539             
540             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
541             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
542             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
543
544             /**************************
545              * CALCULATE INTERACTIONS *
546              **************************/
547
548             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
549
550             /* EWALD ELECTROSTATICS */
551
552             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
553             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
554             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
555             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
556             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
557
558             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
559             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
560             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
561             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
562             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
563             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
564             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
565             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
566             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
567             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
568
569             /* Update potential sum for this i atom from the interaction with this j atom. */
570             velecsum         = _fjsp_add_v2r8(velecsum,velec);
571
572             fscal            = felec;
573
574             /* Update vectorial force */
575             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
576             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
577             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
578             
579             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
580             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
581             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
582
583             /**************************
584              * CALCULATE INTERACTIONS *
585              **************************/
586
587             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
588
589             /* EWALD ELECTROSTATICS */
590
591             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
592             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
593             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
594             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
595             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
596
597             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
598             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
599             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
600             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
601             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
602             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
603             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
604             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
605             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
606             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
607
608             /* Update potential sum for this i atom from the interaction with this j atom. */
609             velecsum         = _fjsp_add_v2r8(velecsum,velec);
610
611             fscal            = felec;
612
613             /* Update vectorial force */
614             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
615             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
616             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
617             
618             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
619             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
620             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
621
622             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
623
624             /* Inner loop uses 396 flops */
625         }
626
627         if(jidx<j_index_end)
628         {
629
630             jnrA             = jjnr[jidx];
631             j_coord_offsetA  = DIM*jnrA;
632
633             /* load j atom coordinates */
634             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
635                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
636
637             /* Calculate displacement vector */
638             dx00             = _fjsp_sub_v2r8(ix0,jx0);
639             dy00             = _fjsp_sub_v2r8(iy0,jy0);
640             dz00             = _fjsp_sub_v2r8(iz0,jz0);
641             dx01             = _fjsp_sub_v2r8(ix0,jx1);
642             dy01             = _fjsp_sub_v2r8(iy0,jy1);
643             dz01             = _fjsp_sub_v2r8(iz0,jz1);
644             dx02             = _fjsp_sub_v2r8(ix0,jx2);
645             dy02             = _fjsp_sub_v2r8(iy0,jy2);
646             dz02             = _fjsp_sub_v2r8(iz0,jz2);
647             dx10             = _fjsp_sub_v2r8(ix1,jx0);
648             dy10             = _fjsp_sub_v2r8(iy1,jy0);
649             dz10             = _fjsp_sub_v2r8(iz1,jz0);
650             dx11             = _fjsp_sub_v2r8(ix1,jx1);
651             dy11             = _fjsp_sub_v2r8(iy1,jy1);
652             dz11             = _fjsp_sub_v2r8(iz1,jz1);
653             dx12             = _fjsp_sub_v2r8(ix1,jx2);
654             dy12             = _fjsp_sub_v2r8(iy1,jy2);
655             dz12             = _fjsp_sub_v2r8(iz1,jz2);
656             dx20             = _fjsp_sub_v2r8(ix2,jx0);
657             dy20             = _fjsp_sub_v2r8(iy2,jy0);
658             dz20             = _fjsp_sub_v2r8(iz2,jz0);
659             dx21             = _fjsp_sub_v2r8(ix2,jx1);
660             dy21             = _fjsp_sub_v2r8(iy2,jy1);
661             dz21             = _fjsp_sub_v2r8(iz2,jz1);
662             dx22             = _fjsp_sub_v2r8(ix2,jx2);
663             dy22             = _fjsp_sub_v2r8(iy2,jy2);
664             dz22             = _fjsp_sub_v2r8(iz2,jz2);
665
666             /* Calculate squared distance and things based on it */
667             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
668             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
669             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
670             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
671             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
672             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
673             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
674             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
675             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
676
677             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
678             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
679             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
680             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
681             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
682             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
683             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
684             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
685             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
686
687             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
688             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
689             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
690             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
691             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
692             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
693             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
694             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
695             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
696
697             fjx0             = _fjsp_setzero_v2r8();
698             fjy0             = _fjsp_setzero_v2r8();
699             fjz0             = _fjsp_setzero_v2r8();
700             fjx1             = _fjsp_setzero_v2r8();
701             fjy1             = _fjsp_setzero_v2r8();
702             fjz1             = _fjsp_setzero_v2r8();
703             fjx2             = _fjsp_setzero_v2r8();
704             fjy2             = _fjsp_setzero_v2r8();
705             fjz2             = _fjsp_setzero_v2r8();
706
707             /**************************
708              * CALCULATE INTERACTIONS *
709              **************************/
710
711             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
712
713             /* EWALD ELECTROSTATICS */
714
715             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
716             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
717             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
718             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
719             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
720
721             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
722             ewtabD           = _fjsp_setzero_v2r8();
723             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
724             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
725             ewtabFn          = _fjsp_setzero_v2r8();
726             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
727             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
728             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
729             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
730             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
731
732             /* Update potential sum for this i atom from the interaction with this j atom. */
733             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
734             velecsum         = _fjsp_add_v2r8(velecsum,velec);
735
736             fscal            = felec;
737
738             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
739
740             /* Update vectorial force */
741             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
742             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
743             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
744             
745             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
746             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
747             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
748
749             /**************************
750              * CALCULATE INTERACTIONS *
751              **************************/
752
753             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
754
755             /* EWALD ELECTROSTATICS */
756
757             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
758             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
759             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
760             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
761             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
762
763             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
764             ewtabD           = _fjsp_setzero_v2r8();
765             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
766             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
767             ewtabFn          = _fjsp_setzero_v2r8();
768             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
769             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
770             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
771             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
772             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
773
774             /* Update potential sum for this i atom from the interaction with this j atom. */
775             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
776             velecsum         = _fjsp_add_v2r8(velecsum,velec);
777
778             fscal            = felec;
779
780             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
781
782             /* Update vectorial force */
783             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
784             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
785             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
786             
787             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
788             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
789             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
790
791             /**************************
792              * CALCULATE INTERACTIONS *
793              **************************/
794
795             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
796
797             /* EWALD ELECTROSTATICS */
798
799             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
800             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
801             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
802             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
803             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
804
805             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
806             ewtabD           = _fjsp_setzero_v2r8();
807             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
808             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
809             ewtabFn          = _fjsp_setzero_v2r8();
810             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
811             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
812             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
813             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
814             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
815
816             /* Update potential sum for this i atom from the interaction with this j atom. */
817             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
818             velecsum         = _fjsp_add_v2r8(velecsum,velec);
819
820             fscal            = felec;
821
822             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
823
824             /* Update vectorial force */
825             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
826             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
827             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
828             
829             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
830             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
831             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
832
833             /**************************
834              * CALCULATE INTERACTIONS *
835              **************************/
836
837             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
838
839             /* EWALD ELECTROSTATICS */
840
841             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
842             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
843             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
844             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
845             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
846
847             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
848             ewtabD           = _fjsp_setzero_v2r8();
849             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
850             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
851             ewtabFn          = _fjsp_setzero_v2r8();
852             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
853             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
854             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
855             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
856             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
857
858             /* Update potential sum for this i atom from the interaction with this j atom. */
859             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
860             velecsum         = _fjsp_add_v2r8(velecsum,velec);
861
862             fscal            = felec;
863
864             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
865
866             /* Update vectorial force */
867             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
868             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
869             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
870             
871             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
872             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
873             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
874
875             /**************************
876              * CALCULATE INTERACTIONS *
877              **************************/
878
879             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
880
881             /* EWALD ELECTROSTATICS */
882
883             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
884             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
885             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
886             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
887             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
888
889             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
890             ewtabD           = _fjsp_setzero_v2r8();
891             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
892             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
893             ewtabFn          = _fjsp_setzero_v2r8();
894             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
895             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
896             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
897             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
898             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
899
900             /* Update potential sum for this i atom from the interaction with this j atom. */
901             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
902             velecsum         = _fjsp_add_v2r8(velecsum,velec);
903
904             fscal            = felec;
905
906             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
907
908             /* Update vectorial force */
909             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
910             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
911             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
912             
913             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
914             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
915             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
916
917             /**************************
918              * CALCULATE INTERACTIONS *
919              **************************/
920
921             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
922
923             /* EWALD ELECTROSTATICS */
924
925             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
926             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
927             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
928             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
929             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
930
931             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
932             ewtabD           = _fjsp_setzero_v2r8();
933             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
934             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
935             ewtabFn          = _fjsp_setzero_v2r8();
936             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
937             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
938             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
939             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
940             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
941
942             /* Update potential sum for this i atom from the interaction with this j atom. */
943             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
944             velecsum         = _fjsp_add_v2r8(velecsum,velec);
945
946             fscal            = felec;
947
948             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
949
950             /* Update vectorial force */
951             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
952             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
953             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
954             
955             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
956             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
957             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
958
959             /**************************
960              * CALCULATE INTERACTIONS *
961              **************************/
962
963             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
964
965             /* EWALD ELECTROSTATICS */
966
967             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
968             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
969             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
970             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
971             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
972
973             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
974             ewtabD           = _fjsp_setzero_v2r8();
975             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
976             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
977             ewtabFn          = _fjsp_setzero_v2r8();
978             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
979             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
980             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
981             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
982             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
983
984             /* Update potential sum for this i atom from the interaction with this j atom. */
985             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
986             velecsum         = _fjsp_add_v2r8(velecsum,velec);
987
988             fscal            = felec;
989
990             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
991
992             /* Update vectorial force */
993             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
994             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
995             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
996             
997             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
998             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
999             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1000
1001             /**************************
1002              * CALCULATE INTERACTIONS *
1003              **************************/
1004
1005             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1006
1007             /* EWALD ELECTROSTATICS */
1008
1009             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1010             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1011             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1012             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1013             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1014
1015             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1016             ewtabD           = _fjsp_setzero_v2r8();
1017             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1018             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1019             ewtabFn          = _fjsp_setzero_v2r8();
1020             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1021             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1022             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1023             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
1024             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1025
1026             /* Update potential sum for this i atom from the interaction with this j atom. */
1027             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1028             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1029
1030             fscal            = felec;
1031
1032             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1033
1034             /* Update vectorial force */
1035             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1036             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1037             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1038             
1039             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1040             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1041             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1042
1043             /**************************
1044              * CALCULATE INTERACTIONS *
1045              **************************/
1046
1047             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1048
1049             /* EWALD ELECTROSTATICS */
1050
1051             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1052             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1053             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1054             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1055             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1056
1057             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1058             ewtabD           = _fjsp_setzero_v2r8();
1059             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1060             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1061             ewtabFn          = _fjsp_setzero_v2r8();
1062             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1063             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1064             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1065             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
1066             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1067
1068             /* Update potential sum for this i atom from the interaction with this j atom. */
1069             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1070             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1071
1072             fscal            = felec;
1073
1074             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1075
1076             /* Update vectorial force */
1077             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1078             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1079             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1080             
1081             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1082             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1083             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1084
1085             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1086
1087             /* Inner loop uses 396 flops */
1088         }
1089
1090         /* End of innermost loop */
1091
1092         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1093                                               f+i_coord_offset,fshift+i_shift_offset);
1094
1095         ggid                        = gid[iidx];
1096         /* Update potential energies */
1097         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1098
1099         /* Increment number of inner iterations */
1100         inneriter                  += j_index_end - j_index_start;
1101
1102         /* Outer loop uses 19 flops */
1103     }
1104
1105     /* Increment number of outer iterations */
1106     outeriter        += nri;
1107
1108     /* Update outer/inner flops */
1109
1110     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*396);
1111 }
1112 /*
1113  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
1114  * Electrostatics interaction: Ewald
1115  * VdW interaction:            None
1116  * Geometry:                   Water3-Water3
1117  * Calculate force/pot:        Force
1118  */
1119 void
1120 nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
1121                     (t_nblist                    * gmx_restrict       nlist,
1122                      rvec                        * gmx_restrict          xx,
1123                      rvec                        * gmx_restrict          ff,
1124                      t_forcerec                  * gmx_restrict          fr,
1125                      t_mdatoms                   * gmx_restrict     mdatoms,
1126                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1127                      t_nrnb                      * gmx_restrict        nrnb)
1128 {
1129     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1130      * just 0 for non-waters.
1131      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1132      * jnr indices corresponding to data put in the four positions in the SIMD register.
1133      */
1134     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1135     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1136     int              jnrA,jnrB;
1137     int              j_coord_offsetA,j_coord_offsetB;
1138     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1139     real             rcutoff_scalar;
1140     real             *shiftvec,*fshift,*x,*f;
1141     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1142     int              vdwioffset0;
1143     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1144     int              vdwioffset1;
1145     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1146     int              vdwioffset2;
1147     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1148     int              vdwjidx0A,vdwjidx0B;
1149     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1150     int              vdwjidx1A,vdwjidx1B;
1151     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1152     int              vdwjidx2A,vdwjidx2B;
1153     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1154     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1155     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1156     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1157     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1158     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1159     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1160     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1161     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1162     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1163     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1164     real             *charge;
1165     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1166     real             *ewtab;
1167     _fjsp_v2r8       itab_tmp;
1168     _fjsp_v2r8       dummy_mask,cutoff_mask;
1169     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1170     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1171     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1172
1173     x                = xx[0];
1174     f                = ff[0];
1175
1176     nri              = nlist->nri;
1177     iinr             = nlist->iinr;
1178     jindex           = nlist->jindex;
1179     jjnr             = nlist->jjnr;
1180     shiftidx         = nlist->shift;
1181     gid              = nlist->gid;
1182     shiftvec         = fr->shift_vec[0];
1183     fshift           = fr->fshift[0];
1184     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1185     charge           = mdatoms->chargeA;
1186
1187     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1188     ewtab            = fr->ic->tabq_coul_F;
1189     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1190     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1191
1192     /* Setup water-specific parameters */
1193     inr              = nlist->iinr[0];
1194     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1195     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1196     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1197
1198     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1199     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1200     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1201     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1202     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1203     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1204     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1205     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1206     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1207     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1208     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1209     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1210
1211     /* Avoid stupid compiler warnings */
1212     jnrA = jnrB = 0;
1213     j_coord_offsetA = 0;
1214     j_coord_offsetB = 0;
1215
1216     outeriter        = 0;
1217     inneriter        = 0;
1218
1219     /* Start outer loop over neighborlists */
1220     for(iidx=0; iidx<nri; iidx++)
1221     {
1222         /* Load shift vector for this list */
1223         i_shift_offset   = DIM*shiftidx[iidx];
1224
1225         /* Load limits for loop over neighbors */
1226         j_index_start    = jindex[iidx];
1227         j_index_end      = jindex[iidx+1];
1228
1229         /* Get outer coordinate index */
1230         inr              = iinr[iidx];
1231         i_coord_offset   = DIM*inr;
1232
1233         /* Load i particle coords and add shift vector */
1234         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1235                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1236
1237         fix0             = _fjsp_setzero_v2r8();
1238         fiy0             = _fjsp_setzero_v2r8();
1239         fiz0             = _fjsp_setzero_v2r8();
1240         fix1             = _fjsp_setzero_v2r8();
1241         fiy1             = _fjsp_setzero_v2r8();
1242         fiz1             = _fjsp_setzero_v2r8();
1243         fix2             = _fjsp_setzero_v2r8();
1244         fiy2             = _fjsp_setzero_v2r8();
1245         fiz2             = _fjsp_setzero_v2r8();
1246
1247         /* Start inner kernel loop */
1248         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1249         {
1250
1251             /* Get j neighbor index, and coordinate index */
1252             jnrA             = jjnr[jidx];
1253             jnrB             = jjnr[jidx+1];
1254             j_coord_offsetA  = DIM*jnrA;
1255             j_coord_offsetB  = DIM*jnrB;
1256
1257             /* load j atom coordinates */
1258             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1259                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1260
1261             /* Calculate displacement vector */
1262             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1263             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1264             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1265             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1266             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1267             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1268             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1269             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1270             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1271             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1272             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1273             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1274             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1275             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1276             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1277             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1278             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1279             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1280             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1281             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1282             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1283             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1284             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1285             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1286             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1287             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1288             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1289
1290             /* Calculate squared distance and things based on it */
1291             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1292             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1293             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1294             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1295             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1296             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1297             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1298             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1299             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1300
1301             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1302             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1303             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1304             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1305             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1306             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1307             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1308             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1309             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1310
1311             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1312             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1313             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1314             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1315             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1316             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1317             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1318             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1319             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1320
1321             fjx0             = _fjsp_setzero_v2r8();
1322             fjy0             = _fjsp_setzero_v2r8();
1323             fjz0             = _fjsp_setzero_v2r8();
1324             fjx1             = _fjsp_setzero_v2r8();
1325             fjy1             = _fjsp_setzero_v2r8();
1326             fjz1             = _fjsp_setzero_v2r8();
1327             fjx2             = _fjsp_setzero_v2r8();
1328             fjy2             = _fjsp_setzero_v2r8();
1329             fjz2             = _fjsp_setzero_v2r8();
1330
1331             /**************************
1332              * CALCULATE INTERACTIONS *
1333              **************************/
1334
1335             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1336
1337             /* EWALD ELECTROSTATICS */
1338
1339             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1340             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
1341             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1342             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1343             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1344
1345             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1346                                          &ewtabF,&ewtabFn);
1347             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1348             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1349
1350             fscal            = felec;
1351
1352             /* Update vectorial force */
1353             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1354             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1355             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1356             
1357             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1358             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1359             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1360
1361             /**************************
1362              * CALCULATE INTERACTIONS *
1363              **************************/
1364
1365             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1366
1367             /* EWALD ELECTROSTATICS */
1368
1369             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1370             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
1371             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1372             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1373             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1374
1375             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1376                                          &ewtabF,&ewtabFn);
1377             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1378             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1379
1380             fscal            = felec;
1381
1382             /* Update vectorial force */
1383             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1384             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1385             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1386             
1387             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1388             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1389             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1390
1391             /**************************
1392              * CALCULATE INTERACTIONS *
1393              **************************/
1394
1395             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
1396
1397             /* EWALD ELECTROSTATICS */
1398
1399             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1400             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
1401             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1402             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1403             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1404
1405             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1406                                          &ewtabF,&ewtabFn);
1407             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1408             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1409
1410             fscal            = felec;
1411
1412             /* Update vectorial force */
1413             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1414             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1415             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1416             
1417             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1418             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1419             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1420
1421             /**************************
1422              * CALCULATE INTERACTIONS *
1423              **************************/
1424
1425             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
1426
1427             /* EWALD ELECTROSTATICS */
1428
1429             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1430             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
1431             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1432             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1433             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1434
1435             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1436                                          &ewtabF,&ewtabFn);
1437             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1438             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1439
1440             fscal            = felec;
1441
1442             /* Update vectorial force */
1443             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1444             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1445             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1446             
1447             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1448             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1449             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1450
1451             /**************************
1452              * CALCULATE INTERACTIONS *
1453              **************************/
1454
1455             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1456
1457             /* EWALD ELECTROSTATICS */
1458
1459             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1460             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1461             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1462             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1463             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1464
1465             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1466                                          &ewtabF,&ewtabFn);
1467             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1468             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1469
1470             fscal            = felec;
1471
1472             /* Update vectorial force */
1473             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1474             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1475             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1476             
1477             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1478             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1479             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1480
1481             /**************************
1482              * CALCULATE INTERACTIONS *
1483              **************************/
1484
1485             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1486
1487             /* EWALD ELECTROSTATICS */
1488
1489             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1490             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1491             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1492             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1493             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1494
1495             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1496                                          &ewtabF,&ewtabFn);
1497             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1498             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1499
1500             fscal            = felec;
1501
1502             /* Update vectorial force */
1503             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1504             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1505             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1506             
1507             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1508             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1509             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1510
1511             /**************************
1512              * CALCULATE INTERACTIONS *
1513              **************************/
1514
1515             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1516
1517             /* EWALD ELECTROSTATICS */
1518
1519             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1520             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
1521             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1522             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1523             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1524
1525             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1526                                          &ewtabF,&ewtabFn);
1527             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1528             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1529
1530             fscal            = felec;
1531
1532             /* Update vectorial force */
1533             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1534             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1535             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1536             
1537             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1538             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1539             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1540
1541             /**************************
1542              * CALCULATE INTERACTIONS *
1543              **************************/
1544
1545             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1546
1547             /* EWALD ELECTROSTATICS */
1548
1549             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1550             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1551             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1552             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1553             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1554
1555             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1556                                          &ewtabF,&ewtabFn);
1557             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1558             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1559
1560             fscal            = felec;
1561
1562             /* Update vectorial force */
1563             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1564             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1565             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1566             
1567             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1568             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1569             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1570
1571             /**************************
1572              * CALCULATE INTERACTIONS *
1573              **************************/
1574
1575             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1576
1577             /* EWALD ELECTROSTATICS */
1578
1579             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1580             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1581             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1582             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1583             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1584
1585             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1586                                          &ewtabF,&ewtabFn);
1587             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1588             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1589
1590             fscal            = felec;
1591
1592             /* Update vectorial force */
1593             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1594             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1595             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1596             
1597             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1598             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1599             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1600
1601             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1602
1603             /* Inner loop uses 351 flops */
1604         }
1605
1606         if(jidx<j_index_end)
1607         {
1608
1609             jnrA             = jjnr[jidx];
1610             j_coord_offsetA  = DIM*jnrA;
1611
1612             /* load j atom coordinates */
1613             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1614                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1615
1616             /* Calculate displacement vector */
1617             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1618             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1619             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1620             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1621             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1622             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1623             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1624             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1625             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1626             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1627             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1628             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1629             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1630             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1631             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1632             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1633             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1634             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1635             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1636             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1637             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1638             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1639             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1640             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1641             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1642             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1643             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1644
1645             /* Calculate squared distance and things based on it */
1646             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1647             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1648             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1649             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1650             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1651             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1652             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1653             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1654             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1655
1656             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1657             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1658             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1659             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1660             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1661             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1662             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1663             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1664             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1665
1666             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1667             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1668             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1669             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1670             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1671             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1672             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1673             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1674             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1675
1676             fjx0             = _fjsp_setzero_v2r8();
1677             fjy0             = _fjsp_setzero_v2r8();
1678             fjz0             = _fjsp_setzero_v2r8();
1679             fjx1             = _fjsp_setzero_v2r8();
1680             fjy1             = _fjsp_setzero_v2r8();
1681             fjz1             = _fjsp_setzero_v2r8();
1682             fjx2             = _fjsp_setzero_v2r8();
1683             fjy2             = _fjsp_setzero_v2r8();
1684             fjz2             = _fjsp_setzero_v2r8();
1685
1686             /**************************
1687              * CALCULATE INTERACTIONS *
1688              **************************/
1689
1690             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1691
1692             /* EWALD ELECTROSTATICS */
1693
1694             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1695             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
1696             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1697             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1698             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1699
1700             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1701             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1702             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1703
1704             fscal            = felec;
1705
1706             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1707
1708             /* Update vectorial force */
1709             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1710             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1711             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1712             
1713             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1714             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1715             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1716
1717             /**************************
1718              * CALCULATE INTERACTIONS *
1719              **************************/
1720
1721             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1722
1723             /* EWALD ELECTROSTATICS */
1724
1725             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1726             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
1727             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1728             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1729             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1730
1731             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1732             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1733             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1734
1735             fscal            = felec;
1736
1737             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1738
1739             /* Update vectorial force */
1740             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1741             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1742             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1743             
1744             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1745             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1746             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1747
1748             /**************************
1749              * CALCULATE INTERACTIONS *
1750              **************************/
1751
1752             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
1753
1754             /* EWALD ELECTROSTATICS */
1755
1756             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1757             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
1758             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1759             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1760             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1761
1762             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1763             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1764             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1765
1766             fscal            = felec;
1767
1768             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1769
1770             /* Update vectorial force */
1771             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1772             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1773             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1774             
1775             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1776             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1777             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1778
1779             /**************************
1780              * CALCULATE INTERACTIONS *
1781              **************************/
1782
1783             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
1784
1785             /* EWALD ELECTROSTATICS */
1786
1787             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1788             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
1789             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1790             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1791             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1792
1793             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1794             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1795             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1796
1797             fscal            = felec;
1798
1799             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1800
1801             /* Update vectorial force */
1802             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1803             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1804             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1805             
1806             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1807             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1808             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1809
1810             /**************************
1811              * CALCULATE INTERACTIONS *
1812              **************************/
1813
1814             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1815
1816             /* EWALD ELECTROSTATICS */
1817
1818             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1819             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1820             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1821             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1822             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1823
1824             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1825             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1826             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1827
1828             fscal            = felec;
1829
1830             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1831
1832             /* Update vectorial force */
1833             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1834             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1835             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1836             
1837             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1838             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1839             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1840
1841             /**************************
1842              * CALCULATE INTERACTIONS *
1843              **************************/
1844
1845             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1846
1847             /* EWALD ELECTROSTATICS */
1848
1849             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1850             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1851             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1852             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1853             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1854
1855             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1856             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1857             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1858
1859             fscal            = felec;
1860
1861             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1862
1863             /* Update vectorial force */
1864             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1865             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1866             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1867             
1868             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1869             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1870             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1871
1872             /**************************
1873              * CALCULATE INTERACTIONS *
1874              **************************/
1875
1876             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1877
1878             /* EWALD ELECTROSTATICS */
1879
1880             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1881             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
1882             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1883             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1884             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1885
1886             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1887             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1888             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1889
1890             fscal            = felec;
1891
1892             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1893
1894             /* Update vectorial force */
1895             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1896             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1897             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1898             
1899             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1900             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1901             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1902
1903             /**************************
1904              * CALCULATE INTERACTIONS *
1905              **************************/
1906
1907             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1908
1909             /* EWALD ELECTROSTATICS */
1910
1911             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1912             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1913             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1914             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1915             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1916
1917             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1918             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1919             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1920
1921             fscal            = felec;
1922
1923             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1924
1925             /* Update vectorial force */
1926             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1927             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1928             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1929             
1930             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1931             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1932             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1933
1934             /**************************
1935              * CALCULATE INTERACTIONS *
1936              **************************/
1937
1938             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1939
1940             /* EWALD ELECTROSTATICS */
1941
1942             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1943             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1944             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1945             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1946             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1947
1948             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1949             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1950             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1951
1952             fscal            = felec;
1953
1954             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1955
1956             /* Update vectorial force */
1957             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1958             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1959             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1960             
1961             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1962             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1963             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1964
1965             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1966
1967             /* Inner loop uses 351 flops */
1968         }
1969
1970         /* End of innermost loop */
1971
1972         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1973                                               f+i_coord_offset,fshift+i_shift_offset);
1974
1975         /* Increment number of inner iterations */
1976         inneriter                  += j_index_end - j_index_start;
1977
1978         /* Outer loop uses 18 flops */
1979     }
1980
1981     /* Increment number of outer iterations */
1982     outeriter        += nri;
1983
1984     /* Update outer/inner flops */
1985
1986     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*351);
1987 }