Remove all unnecessary HAVE_CONFIG_H
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecEwSh_VdwNone_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: Ewald
52  * VdW interaction:            None
53  * Geometry:                   Water3-Water3
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset0;
80     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
81     int              vdwioffset1;
82     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
83     int              vdwioffset2;
84     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85     int              vdwjidx0A,vdwjidx0B;
86     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87     int              vdwjidx1A,vdwjidx1B;
88     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89     int              vdwjidx2A,vdwjidx2B;
90     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
101     real             *charge;
102     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
103     real             *ewtab;
104     _fjsp_v2r8       itab_tmp;
105     _fjsp_v2r8       dummy_mask,cutoff_mask;
106     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
107     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
108     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
109
110     x                = xx[0];
111     f                = ff[0];
112
113     nri              = nlist->nri;
114     iinr             = nlist->iinr;
115     jindex           = nlist->jindex;
116     jjnr             = nlist->jjnr;
117     shiftidx         = nlist->shift;
118     gid              = nlist->gid;
119     shiftvec         = fr->shift_vec[0];
120     fshift           = fr->fshift[0];
121     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
122     charge           = mdatoms->chargeA;
123
124     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
125     ewtab            = fr->ic->tabq_coul_FDV0;
126     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
127     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
128
129     /* Setup water-specific parameters */
130     inr              = nlist->iinr[0];
131     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
132     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
133     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
134
135     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
136     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
137     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
138     qq00             = _fjsp_mul_v2r8(iq0,jq0);
139     qq01             = _fjsp_mul_v2r8(iq0,jq1);
140     qq02             = _fjsp_mul_v2r8(iq0,jq2);
141     qq10             = _fjsp_mul_v2r8(iq1,jq0);
142     qq11             = _fjsp_mul_v2r8(iq1,jq1);
143     qq12             = _fjsp_mul_v2r8(iq1,jq2);
144     qq20             = _fjsp_mul_v2r8(iq2,jq0);
145     qq21             = _fjsp_mul_v2r8(iq2,jq1);
146     qq22             = _fjsp_mul_v2r8(iq2,jq2);
147
148     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
149     rcutoff_scalar   = fr->rcoulomb;
150     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
151     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
152
153     /* Avoid stupid compiler warnings */
154     jnrA = jnrB = 0;
155     j_coord_offsetA = 0;
156     j_coord_offsetB = 0;
157
158     outeriter        = 0;
159     inneriter        = 0;
160
161     /* Start outer loop over neighborlists */
162     for(iidx=0; iidx<nri; iidx++)
163     {
164         /* Load shift vector for this list */
165         i_shift_offset   = DIM*shiftidx[iidx];
166
167         /* Load limits for loop over neighbors */
168         j_index_start    = jindex[iidx];
169         j_index_end      = jindex[iidx+1];
170
171         /* Get outer coordinate index */
172         inr              = iinr[iidx];
173         i_coord_offset   = DIM*inr;
174
175         /* Load i particle coords and add shift vector */
176         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
177                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
178
179         fix0             = _fjsp_setzero_v2r8();
180         fiy0             = _fjsp_setzero_v2r8();
181         fiz0             = _fjsp_setzero_v2r8();
182         fix1             = _fjsp_setzero_v2r8();
183         fiy1             = _fjsp_setzero_v2r8();
184         fiz1             = _fjsp_setzero_v2r8();
185         fix2             = _fjsp_setzero_v2r8();
186         fiy2             = _fjsp_setzero_v2r8();
187         fiz2             = _fjsp_setzero_v2r8();
188
189         /* Reset potential sums */
190         velecsum         = _fjsp_setzero_v2r8();
191
192         /* Start inner kernel loop */
193         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
194         {
195
196             /* Get j neighbor index, and coordinate index */
197             jnrA             = jjnr[jidx];
198             jnrB             = jjnr[jidx+1];
199             j_coord_offsetA  = DIM*jnrA;
200             j_coord_offsetB  = DIM*jnrB;
201
202             /* load j atom coordinates */
203             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
204                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
205
206             /* Calculate displacement vector */
207             dx00             = _fjsp_sub_v2r8(ix0,jx0);
208             dy00             = _fjsp_sub_v2r8(iy0,jy0);
209             dz00             = _fjsp_sub_v2r8(iz0,jz0);
210             dx01             = _fjsp_sub_v2r8(ix0,jx1);
211             dy01             = _fjsp_sub_v2r8(iy0,jy1);
212             dz01             = _fjsp_sub_v2r8(iz0,jz1);
213             dx02             = _fjsp_sub_v2r8(ix0,jx2);
214             dy02             = _fjsp_sub_v2r8(iy0,jy2);
215             dz02             = _fjsp_sub_v2r8(iz0,jz2);
216             dx10             = _fjsp_sub_v2r8(ix1,jx0);
217             dy10             = _fjsp_sub_v2r8(iy1,jy0);
218             dz10             = _fjsp_sub_v2r8(iz1,jz0);
219             dx11             = _fjsp_sub_v2r8(ix1,jx1);
220             dy11             = _fjsp_sub_v2r8(iy1,jy1);
221             dz11             = _fjsp_sub_v2r8(iz1,jz1);
222             dx12             = _fjsp_sub_v2r8(ix1,jx2);
223             dy12             = _fjsp_sub_v2r8(iy1,jy2);
224             dz12             = _fjsp_sub_v2r8(iz1,jz2);
225             dx20             = _fjsp_sub_v2r8(ix2,jx0);
226             dy20             = _fjsp_sub_v2r8(iy2,jy0);
227             dz20             = _fjsp_sub_v2r8(iz2,jz0);
228             dx21             = _fjsp_sub_v2r8(ix2,jx1);
229             dy21             = _fjsp_sub_v2r8(iy2,jy1);
230             dz21             = _fjsp_sub_v2r8(iz2,jz1);
231             dx22             = _fjsp_sub_v2r8(ix2,jx2);
232             dy22             = _fjsp_sub_v2r8(iy2,jy2);
233             dz22             = _fjsp_sub_v2r8(iz2,jz2);
234
235             /* Calculate squared distance and things based on it */
236             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
237             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
238             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
239             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
240             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
241             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
242             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
243             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
244             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
245
246             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
247             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
248             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
249             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
250             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
251             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
252             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
253             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
254             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
255
256             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
257             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
258             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
259             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
260             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
261             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
262             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
263             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
264             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
265
266             fjx0             = _fjsp_setzero_v2r8();
267             fjy0             = _fjsp_setzero_v2r8();
268             fjz0             = _fjsp_setzero_v2r8();
269             fjx1             = _fjsp_setzero_v2r8();
270             fjy1             = _fjsp_setzero_v2r8();
271             fjz1             = _fjsp_setzero_v2r8();
272             fjx2             = _fjsp_setzero_v2r8();
273             fjy2             = _fjsp_setzero_v2r8();
274             fjz2             = _fjsp_setzero_v2r8();
275
276             /**************************
277              * CALCULATE INTERACTIONS *
278              **************************/
279
280             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
281             {
282
283             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
284
285             /* EWALD ELECTROSTATICS */
286
287             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
288             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
289             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
290             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
291             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
292
293             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
294             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
295             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
296             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
297             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
298             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
299             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
300             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
301             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
302             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
303
304             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
305
306             /* Update potential sum for this i atom from the interaction with this j atom. */
307             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
308             velecsum         = _fjsp_add_v2r8(velecsum,velec);
309
310             fscal            = felec;
311
312             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
313
314             /* Update vectorial force */
315             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
316             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
317             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
318             
319             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
320             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
321             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
322
323             }
324
325             /**************************
326              * CALCULATE INTERACTIONS *
327              **************************/
328
329             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
330             {
331
332             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
333
334             /* EWALD ELECTROSTATICS */
335
336             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
337             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
338             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
339             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
340             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
341
342             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
343             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
344             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
345             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
346             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
347             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
348             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
349             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
350             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
351             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
352
353             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
354
355             /* Update potential sum for this i atom from the interaction with this j atom. */
356             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
357             velecsum         = _fjsp_add_v2r8(velecsum,velec);
358
359             fscal            = felec;
360
361             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
362
363             /* Update vectorial force */
364             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
365             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
366             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
367             
368             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
369             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
370             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
371
372             }
373
374             /**************************
375              * CALCULATE INTERACTIONS *
376              **************************/
377
378             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
379             {
380
381             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
382
383             /* EWALD ELECTROSTATICS */
384
385             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
386             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
387             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
388             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
389             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
390
391             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
392             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
393             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
394             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
395             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
396             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
397             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
398             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
399             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
400             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
401
402             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
403
404             /* Update potential sum for this i atom from the interaction with this j atom. */
405             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
406             velecsum         = _fjsp_add_v2r8(velecsum,velec);
407
408             fscal            = felec;
409
410             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
411
412             /* Update vectorial force */
413             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
414             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
415             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
416             
417             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
418             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
419             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
420
421             }
422
423             /**************************
424              * CALCULATE INTERACTIONS *
425              **************************/
426
427             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
428             {
429
430             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
431
432             /* EWALD ELECTROSTATICS */
433
434             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
435             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
436             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
437             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
438             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
439
440             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
441             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
442             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
443             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
444             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
445             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
446             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
447             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
448             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
449             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
450
451             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
452
453             /* Update potential sum for this i atom from the interaction with this j atom. */
454             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
455             velecsum         = _fjsp_add_v2r8(velecsum,velec);
456
457             fscal            = felec;
458
459             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
460
461             /* Update vectorial force */
462             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
463             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
464             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
465             
466             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
467             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
468             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
469
470             }
471
472             /**************************
473              * CALCULATE INTERACTIONS *
474              **************************/
475
476             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
477             {
478
479             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
480
481             /* EWALD ELECTROSTATICS */
482
483             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
484             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
485             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
486             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
487             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
488
489             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
490             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
491             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
492             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
493             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
494             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
495             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
496             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
497             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
498             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
499
500             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
501
502             /* Update potential sum for this i atom from the interaction with this j atom. */
503             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
504             velecsum         = _fjsp_add_v2r8(velecsum,velec);
505
506             fscal            = felec;
507
508             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
509
510             /* Update vectorial force */
511             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
512             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
513             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
514             
515             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
516             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
517             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
518
519             }
520
521             /**************************
522              * CALCULATE INTERACTIONS *
523              **************************/
524
525             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
526             {
527
528             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
529
530             /* EWALD ELECTROSTATICS */
531
532             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
533             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
534             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
535             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
536             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
537
538             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
539             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
540             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
541             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
542             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
543             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
544             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
545             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
546             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
547             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
548
549             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
550
551             /* Update potential sum for this i atom from the interaction with this j atom. */
552             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
553             velecsum         = _fjsp_add_v2r8(velecsum,velec);
554
555             fscal            = felec;
556
557             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
558
559             /* Update vectorial force */
560             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
561             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
562             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
563             
564             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
565             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
566             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
567
568             }
569
570             /**************************
571              * CALCULATE INTERACTIONS *
572              **************************/
573
574             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
575             {
576
577             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
578
579             /* EWALD ELECTROSTATICS */
580
581             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
582             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
583             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
584             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
585             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
586
587             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
588             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
589             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
590             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
591             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
592             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
593             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
594             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
595             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
596             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
597
598             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
599
600             /* Update potential sum for this i atom from the interaction with this j atom. */
601             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
602             velecsum         = _fjsp_add_v2r8(velecsum,velec);
603
604             fscal            = felec;
605
606             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
607
608             /* Update vectorial force */
609             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
610             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
611             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
612             
613             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
614             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
615             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
616
617             }
618
619             /**************************
620              * CALCULATE INTERACTIONS *
621              **************************/
622
623             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
624             {
625
626             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
627
628             /* EWALD ELECTROSTATICS */
629
630             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
631             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
632             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
633             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
634             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
635
636             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
637             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
638             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
639             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
640             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
641             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
642             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
643             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
644             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
645             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
646
647             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
648
649             /* Update potential sum for this i atom from the interaction with this j atom. */
650             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
651             velecsum         = _fjsp_add_v2r8(velecsum,velec);
652
653             fscal            = felec;
654
655             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
656
657             /* Update vectorial force */
658             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
659             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
660             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
661             
662             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
663             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
664             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
665
666             }
667
668             /**************************
669              * CALCULATE INTERACTIONS *
670              **************************/
671
672             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
673             {
674
675             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
676
677             /* EWALD ELECTROSTATICS */
678
679             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
680             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
681             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
682             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
683             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
684
685             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
686             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
687             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
688             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
689             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
690             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
691             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
692             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
693             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
694             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
695
696             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
697
698             /* Update potential sum for this i atom from the interaction with this j atom. */
699             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
700             velecsum         = _fjsp_add_v2r8(velecsum,velec);
701
702             fscal            = felec;
703
704             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
705
706             /* Update vectorial force */
707             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
708             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
709             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
710             
711             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
712             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
713             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
714
715             }
716
717             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
718
719             /* Inner loop uses 441 flops */
720         }
721
722         if(jidx<j_index_end)
723         {
724
725             jnrA             = jjnr[jidx];
726             j_coord_offsetA  = DIM*jnrA;
727
728             /* load j atom coordinates */
729             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
730                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
731
732             /* Calculate displacement vector */
733             dx00             = _fjsp_sub_v2r8(ix0,jx0);
734             dy00             = _fjsp_sub_v2r8(iy0,jy0);
735             dz00             = _fjsp_sub_v2r8(iz0,jz0);
736             dx01             = _fjsp_sub_v2r8(ix0,jx1);
737             dy01             = _fjsp_sub_v2r8(iy0,jy1);
738             dz01             = _fjsp_sub_v2r8(iz0,jz1);
739             dx02             = _fjsp_sub_v2r8(ix0,jx2);
740             dy02             = _fjsp_sub_v2r8(iy0,jy2);
741             dz02             = _fjsp_sub_v2r8(iz0,jz2);
742             dx10             = _fjsp_sub_v2r8(ix1,jx0);
743             dy10             = _fjsp_sub_v2r8(iy1,jy0);
744             dz10             = _fjsp_sub_v2r8(iz1,jz0);
745             dx11             = _fjsp_sub_v2r8(ix1,jx1);
746             dy11             = _fjsp_sub_v2r8(iy1,jy1);
747             dz11             = _fjsp_sub_v2r8(iz1,jz1);
748             dx12             = _fjsp_sub_v2r8(ix1,jx2);
749             dy12             = _fjsp_sub_v2r8(iy1,jy2);
750             dz12             = _fjsp_sub_v2r8(iz1,jz2);
751             dx20             = _fjsp_sub_v2r8(ix2,jx0);
752             dy20             = _fjsp_sub_v2r8(iy2,jy0);
753             dz20             = _fjsp_sub_v2r8(iz2,jz0);
754             dx21             = _fjsp_sub_v2r8(ix2,jx1);
755             dy21             = _fjsp_sub_v2r8(iy2,jy1);
756             dz21             = _fjsp_sub_v2r8(iz2,jz1);
757             dx22             = _fjsp_sub_v2r8(ix2,jx2);
758             dy22             = _fjsp_sub_v2r8(iy2,jy2);
759             dz22             = _fjsp_sub_v2r8(iz2,jz2);
760
761             /* Calculate squared distance and things based on it */
762             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
763             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
764             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
765             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
766             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
767             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
768             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
769             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
770             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
771
772             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
773             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
774             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
775             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
776             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
777             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
778             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
779             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
780             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
781
782             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
783             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
784             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
785             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
786             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
787             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
788             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
789             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
790             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
791
792             fjx0             = _fjsp_setzero_v2r8();
793             fjy0             = _fjsp_setzero_v2r8();
794             fjz0             = _fjsp_setzero_v2r8();
795             fjx1             = _fjsp_setzero_v2r8();
796             fjy1             = _fjsp_setzero_v2r8();
797             fjz1             = _fjsp_setzero_v2r8();
798             fjx2             = _fjsp_setzero_v2r8();
799             fjy2             = _fjsp_setzero_v2r8();
800             fjz2             = _fjsp_setzero_v2r8();
801
802             /**************************
803              * CALCULATE INTERACTIONS *
804              **************************/
805
806             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
807             {
808
809             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
810
811             /* EWALD ELECTROSTATICS */
812
813             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
814             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
815             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
816             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
817             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
818
819             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
820             ewtabD           = _fjsp_setzero_v2r8();
821             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
822             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
823             ewtabFn          = _fjsp_setzero_v2r8();
824             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
825             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
826             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
827             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
828             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
829
830             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
831
832             /* Update potential sum for this i atom from the interaction with this j atom. */
833             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
834             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
835             velecsum         = _fjsp_add_v2r8(velecsum,velec);
836
837             fscal            = felec;
838
839             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
840
841             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
842
843             /* Update vectorial force */
844             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
845             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
846             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
847             
848             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
849             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
850             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
851
852             }
853
854             /**************************
855              * CALCULATE INTERACTIONS *
856              **************************/
857
858             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
859             {
860
861             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
862
863             /* EWALD ELECTROSTATICS */
864
865             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
866             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
867             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
868             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
869             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
870
871             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
872             ewtabD           = _fjsp_setzero_v2r8();
873             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
874             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
875             ewtabFn          = _fjsp_setzero_v2r8();
876             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
877             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
878             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
879             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
880             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
881
882             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
883
884             /* Update potential sum for this i atom from the interaction with this j atom. */
885             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
886             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
887             velecsum         = _fjsp_add_v2r8(velecsum,velec);
888
889             fscal            = felec;
890
891             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
892
893             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
894
895             /* Update vectorial force */
896             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
897             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
898             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
899             
900             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
901             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
902             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
903
904             }
905
906             /**************************
907              * CALCULATE INTERACTIONS *
908              **************************/
909
910             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
911             {
912
913             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
914
915             /* EWALD ELECTROSTATICS */
916
917             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
918             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
919             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
920             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
921             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
922
923             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
924             ewtabD           = _fjsp_setzero_v2r8();
925             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
926             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
927             ewtabFn          = _fjsp_setzero_v2r8();
928             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
929             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
930             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
931             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
932             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
933
934             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
935
936             /* Update potential sum for this i atom from the interaction with this j atom. */
937             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
938             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
939             velecsum         = _fjsp_add_v2r8(velecsum,velec);
940
941             fscal            = felec;
942
943             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
944
945             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
946
947             /* Update vectorial force */
948             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
949             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
950             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
951             
952             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
953             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
954             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
955
956             }
957
958             /**************************
959              * CALCULATE INTERACTIONS *
960              **************************/
961
962             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
963             {
964
965             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
966
967             /* EWALD ELECTROSTATICS */
968
969             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
970             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
971             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
972             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
973             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
974
975             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
976             ewtabD           = _fjsp_setzero_v2r8();
977             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
978             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
979             ewtabFn          = _fjsp_setzero_v2r8();
980             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
981             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
982             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
983             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
984             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
985
986             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
987
988             /* Update potential sum for this i atom from the interaction with this j atom. */
989             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
990             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
991             velecsum         = _fjsp_add_v2r8(velecsum,velec);
992
993             fscal            = felec;
994
995             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
996
997             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
998
999             /* Update vectorial force */
1000             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1001             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1002             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1003             
1004             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1005             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1006             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1007
1008             }
1009
1010             /**************************
1011              * CALCULATE INTERACTIONS *
1012              **************************/
1013
1014             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1015             {
1016
1017             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1018
1019             /* EWALD ELECTROSTATICS */
1020
1021             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1022             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1023             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1024             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1025             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1026
1027             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1028             ewtabD           = _fjsp_setzero_v2r8();
1029             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1030             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1031             ewtabFn          = _fjsp_setzero_v2r8();
1032             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1033             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1034             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1035             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
1036             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1037
1038             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1039
1040             /* Update potential sum for this i atom from the interaction with this j atom. */
1041             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1042             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1043             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1044
1045             fscal            = felec;
1046
1047             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1048
1049             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1050
1051             /* Update vectorial force */
1052             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1053             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1054             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1055             
1056             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1057             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1058             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1059
1060             }
1061
1062             /**************************
1063              * CALCULATE INTERACTIONS *
1064              **************************/
1065
1066             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1067             {
1068
1069             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1070
1071             /* EWALD ELECTROSTATICS */
1072
1073             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1074             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1075             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1076             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1077             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1078
1079             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1080             ewtabD           = _fjsp_setzero_v2r8();
1081             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1082             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1083             ewtabFn          = _fjsp_setzero_v2r8();
1084             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1085             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1086             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1087             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
1088             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1089
1090             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1091
1092             /* Update potential sum for this i atom from the interaction with this j atom. */
1093             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1094             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1095             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1096
1097             fscal            = felec;
1098
1099             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1100
1101             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1102
1103             /* Update vectorial force */
1104             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1105             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1106             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1107             
1108             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1109             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1110             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1111
1112             }
1113
1114             /**************************
1115              * CALCULATE INTERACTIONS *
1116              **************************/
1117
1118             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1119             {
1120
1121             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1122
1123             /* EWALD ELECTROSTATICS */
1124
1125             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1126             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
1127             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1128             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1129             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1130
1131             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1132             ewtabD           = _fjsp_setzero_v2r8();
1133             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1134             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1135             ewtabFn          = _fjsp_setzero_v2r8();
1136             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1137             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1138             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1139             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
1140             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1141
1142             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1143
1144             /* Update potential sum for this i atom from the interaction with this j atom. */
1145             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1146             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1147             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1148
1149             fscal            = felec;
1150
1151             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1152
1153             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1154
1155             /* Update vectorial force */
1156             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1157             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1158             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1159             
1160             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1161             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1162             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1163
1164             }
1165
1166             /**************************
1167              * CALCULATE INTERACTIONS *
1168              **************************/
1169
1170             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1171             {
1172
1173             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1174
1175             /* EWALD ELECTROSTATICS */
1176
1177             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1178             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1179             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1180             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1181             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1182
1183             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1184             ewtabD           = _fjsp_setzero_v2r8();
1185             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1186             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1187             ewtabFn          = _fjsp_setzero_v2r8();
1188             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1189             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1190             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1191             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
1192             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1193
1194             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1195
1196             /* Update potential sum for this i atom from the interaction with this j atom. */
1197             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1198             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1199             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1200
1201             fscal            = felec;
1202
1203             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1204
1205             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1206
1207             /* Update vectorial force */
1208             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1209             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1210             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1211             
1212             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1213             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1214             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1215
1216             }
1217
1218             /**************************
1219              * CALCULATE INTERACTIONS *
1220              **************************/
1221
1222             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1223             {
1224
1225             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1226
1227             /* EWALD ELECTROSTATICS */
1228
1229             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1230             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1231             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1232             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1233             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1234
1235             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1236             ewtabD           = _fjsp_setzero_v2r8();
1237             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1238             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1239             ewtabFn          = _fjsp_setzero_v2r8();
1240             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1241             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1242             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1243             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
1244             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1245
1246             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1247
1248             /* Update potential sum for this i atom from the interaction with this j atom. */
1249             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1250             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1251             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1252
1253             fscal            = felec;
1254
1255             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1256
1257             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1258
1259             /* Update vectorial force */
1260             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1261             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1262             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1263             
1264             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1265             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1266             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1267
1268             }
1269
1270             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1271
1272             /* Inner loop uses 441 flops */
1273         }
1274
1275         /* End of innermost loop */
1276
1277         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1278                                               f+i_coord_offset,fshift+i_shift_offset);
1279
1280         ggid                        = gid[iidx];
1281         /* Update potential energies */
1282         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1283
1284         /* Increment number of inner iterations */
1285         inneriter                  += j_index_end - j_index_start;
1286
1287         /* Outer loop uses 19 flops */
1288     }
1289
1290     /* Increment number of outer iterations */
1291     outeriter        += nri;
1292
1293     /* Update outer/inner flops */
1294
1295     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*441);
1296 }
1297 /*
1298  * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
1299  * Electrostatics interaction: Ewald
1300  * VdW interaction:            None
1301  * Geometry:                   Water3-Water3
1302  * Calculate force/pot:        Force
1303  */
1304 void
1305 nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
1306                     (t_nblist                    * gmx_restrict       nlist,
1307                      rvec                        * gmx_restrict          xx,
1308                      rvec                        * gmx_restrict          ff,
1309                      t_forcerec                  * gmx_restrict          fr,
1310                      t_mdatoms                   * gmx_restrict     mdatoms,
1311                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1312                      t_nrnb                      * gmx_restrict        nrnb)
1313 {
1314     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1315      * just 0 for non-waters.
1316      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1317      * jnr indices corresponding to data put in the four positions in the SIMD register.
1318      */
1319     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1320     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1321     int              jnrA,jnrB;
1322     int              j_coord_offsetA,j_coord_offsetB;
1323     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1324     real             rcutoff_scalar;
1325     real             *shiftvec,*fshift,*x,*f;
1326     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1327     int              vdwioffset0;
1328     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1329     int              vdwioffset1;
1330     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1331     int              vdwioffset2;
1332     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1333     int              vdwjidx0A,vdwjidx0B;
1334     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1335     int              vdwjidx1A,vdwjidx1B;
1336     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1337     int              vdwjidx2A,vdwjidx2B;
1338     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1339     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1340     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1341     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1342     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1343     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1344     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1345     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1346     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1347     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1348     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1349     real             *charge;
1350     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1351     real             *ewtab;
1352     _fjsp_v2r8       itab_tmp;
1353     _fjsp_v2r8       dummy_mask,cutoff_mask;
1354     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1355     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1356     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1357
1358     x                = xx[0];
1359     f                = ff[0];
1360
1361     nri              = nlist->nri;
1362     iinr             = nlist->iinr;
1363     jindex           = nlist->jindex;
1364     jjnr             = nlist->jjnr;
1365     shiftidx         = nlist->shift;
1366     gid              = nlist->gid;
1367     shiftvec         = fr->shift_vec[0];
1368     fshift           = fr->fshift[0];
1369     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1370     charge           = mdatoms->chargeA;
1371
1372     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1373     ewtab            = fr->ic->tabq_coul_F;
1374     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1375     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1376
1377     /* Setup water-specific parameters */
1378     inr              = nlist->iinr[0];
1379     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1380     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1381     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1382
1383     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1384     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1385     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1386     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1387     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1388     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1389     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1390     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1391     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1392     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1393     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1394     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1395
1396     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1397     rcutoff_scalar   = fr->rcoulomb;
1398     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1399     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
1400
1401     /* Avoid stupid compiler warnings */
1402     jnrA = jnrB = 0;
1403     j_coord_offsetA = 0;
1404     j_coord_offsetB = 0;
1405
1406     outeriter        = 0;
1407     inneriter        = 0;
1408
1409     /* Start outer loop over neighborlists */
1410     for(iidx=0; iidx<nri; iidx++)
1411     {
1412         /* Load shift vector for this list */
1413         i_shift_offset   = DIM*shiftidx[iidx];
1414
1415         /* Load limits for loop over neighbors */
1416         j_index_start    = jindex[iidx];
1417         j_index_end      = jindex[iidx+1];
1418
1419         /* Get outer coordinate index */
1420         inr              = iinr[iidx];
1421         i_coord_offset   = DIM*inr;
1422
1423         /* Load i particle coords and add shift vector */
1424         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1425                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1426
1427         fix0             = _fjsp_setzero_v2r8();
1428         fiy0             = _fjsp_setzero_v2r8();
1429         fiz0             = _fjsp_setzero_v2r8();
1430         fix1             = _fjsp_setzero_v2r8();
1431         fiy1             = _fjsp_setzero_v2r8();
1432         fiz1             = _fjsp_setzero_v2r8();
1433         fix2             = _fjsp_setzero_v2r8();
1434         fiy2             = _fjsp_setzero_v2r8();
1435         fiz2             = _fjsp_setzero_v2r8();
1436
1437         /* Start inner kernel loop */
1438         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1439         {
1440
1441             /* Get j neighbor index, and coordinate index */
1442             jnrA             = jjnr[jidx];
1443             jnrB             = jjnr[jidx+1];
1444             j_coord_offsetA  = DIM*jnrA;
1445             j_coord_offsetB  = DIM*jnrB;
1446
1447             /* load j atom coordinates */
1448             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1449                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1450
1451             /* Calculate displacement vector */
1452             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1453             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1454             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1455             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1456             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1457             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1458             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1459             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1460             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1461             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1462             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1463             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1464             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1465             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1466             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1467             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1468             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1469             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1470             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1471             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1472             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1473             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1474             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1475             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1476             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1477             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1478             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1479
1480             /* Calculate squared distance and things based on it */
1481             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1482             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1483             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1484             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1485             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1486             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1487             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1488             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1489             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1490
1491             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1492             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1493             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1494             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1495             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1496             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1497             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1498             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1499             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1500
1501             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1502             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1503             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1504             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1505             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1506             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1507             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1508             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1509             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1510
1511             fjx0             = _fjsp_setzero_v2r8();
1512             fjy0             = _fjsp_setzero_v2r8();
1513             fjz0             = _fjsp_setzero_v2r8();
1514             fjx1             = _fjsp_setzero_v2r8();
1515             fjy1             = _fjsp_setzero_v2r8();
1516             fjz1             = _fjsp_setzero_v2r8();
1517             fjx2             = _fjsp_setzero_v2r8();
1518             fjy2             = _fjsp_setzero_v2r8();
1519             fjz2             = _fjsp_setzero_v2r8();
1520
1521             /**************************
1522              * CALCULATE INTERACTIONS *
1523              **************************/
1524
1525             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1526             {
1527
1528             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1529
1530             /* EWALD ELECTROSTATICS */
1531
1532             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1533             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
1534             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1535             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1536             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1537
1538             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1539                                          &ewtabF,&ewtabFn);
1540             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1541             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1542
1543             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1544
1545             fscal            = felec;
1546
1547             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1548
1549             /* Update vectorial force */
1550             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1551             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1552             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1553             
1554             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1555             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1556             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1557
1558             }
1559
1560             /**************************
1561              * CALCULATE INTERACTIONS *
1562              **************************/
1563
1564             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1565             {
1566
1567             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1568
1569             /* EWALD ELECTROSTATICS */
1570
1571             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1572             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
1573             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1574             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1575             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1576
1577             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1578                                          &ewtabF,&ewtabFn);
1579             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1580             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1581
1582             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1583
1584             fscal            = felec;
1585
1586             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1587
1588             /* Update vectorial force */
1589             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1590             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1591             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1592             
1593             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1594             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1595             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1596
1597             }
1598
1599             /**************************
1600              * CALCULATE INTERACTIONS *
1601              **************************/
1602
1603             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1604             {
1605
1606             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
1607
1608             /* EWALD ELECTROSTATICS */
1609
1610             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1611             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
1612             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1613             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1614             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1615
1616             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1617                                          &ewtabF,&ewtabFn);
1618             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1619             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1620
1621             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1622
1623             fscal            = felec;
1624
1625             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1626
1627             /* Update vectorial force */
1628             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1629             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1630             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1631             
1632             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1633             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1634             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1635
1636             }
1637
1638             /**************************
1639              * CALCULATE INTERACTIONS *
1640              **************************/
1641
1642             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1643             {
1644
1645             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
1646
1647             /* EWALD ELECTROSTATICS */
1648
1649             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1650             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
1651             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1652             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1653             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1654
1655             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1656                                          &ewtabF,&ewtabFn);
1657             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1658             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1659
1660             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1661
1662             fscal            = felec;
1663
1664             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1665
1666             /* Update vectorial force */
1667             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1668             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1669             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1670             
1671             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1672             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1673             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1674
1675             }
1676
1677             /**************************
1678              * CALCULATE INTERACTIONS *
1679              **************************/
1680
1681             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1682             {
1683
1684             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1685
1686             /* EWALD ELECTROSTATICS */
1687
1688             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1689             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1690             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1691             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1692             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1693
1694             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1695                                          &ewtabF,&ewtabFn);
1696             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1697             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1698
1699             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1700
1701             fscal            = felec;
1702
1703             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1704
1705             /* Update vectorial force */
1706             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1707             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1708             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1709             
1710             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1711             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1712             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1713
1714             }
1715
1716             /**************************
1717              * CALCULATE INTERACTIONS *
1718              **************************/
1719
1720             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1721             {
1722
1723             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1724
1725             /* EWALD ELECTROSTATICS */
1726
1727             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1728             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1729             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1730             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1731             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1732
1733             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1734                                          &ewtabF,&ewtabFn);
1735             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1736             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1737
1738             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1739
1740             fscal            = felec;
1741
1742             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1743
1744             /* Update vectorial force */
1745             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1746             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1747             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1748             
1749             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1750             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1751             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1752
1753             }
1754
1755             /**************************
1756              * CALCULATE INTERACTIONS *
1757              **************************/
1758
1759             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1760             {
1761
1762             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1763
1764             /* EWALD ELECTROSTATICS */
1765
1766             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1767             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
1768             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1769             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1770             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1771
1772             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1773                                          &ewtabF,&ewtabFn);
1774             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1775             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1776
1777             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1778
1779             fscal            = felec;
1780
1781             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1782
1783             /* Update vectorial force */
1784             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1785             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1786             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1787             
1788             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1789             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1790             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1791
1792             }
1793
1794             /**************************
1795              * CALCULATE INTERACTIONS *
1796              **************************/
1797
1798             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1799             {
1800
1801             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1802
1803             /* EWALD ELECTROSTATICS */
1804
1805             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1806             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1807             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1808             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1809             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1810
1811             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1812                                          &ewtabF,&ewtabFn);
1813             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1814             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1815
1816             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1817
1818             fscal            = felec;
1819
1820             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1821
1822             /* Update vectorial force */
1823             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1824             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1825             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1826             
1827             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1828             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1829             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1830
1831             }
1832
1833             /**************************
1834              * CALCULATE INTERACTIONS *
1835              **************************/
1836
1837             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1838             {
1839
1840             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1841
1842             /* EWALD ELECTROSTATICS */
1843
1844             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1845             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1846             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1847             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1848             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1849
1850             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1851                                          &ewtabF,&ewtabFn);
1852             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1853             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1854
1855             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1856
1857             fscal            = felec;
1858
1859             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1860
1861             /* Update vectorial force */
1862             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1863             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1864             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1865             
1866             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1867             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1868             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1869
1870             }
1871
1872             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1873
1874             /* Inner loop uses 378 flops */
1875         }
1876
1877         if(jidx<j_index_end)
1878         {
1879
1880             jnrA             = jjnr[jidx];
1881             j_coord_offsetA  = DIM*jnrA;
1882
1883             /* load j atom coordinates */
1884             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1885                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1886
1887             /* Calculate displacement vector */
1888             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1889             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1890             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1891             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1892             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1893             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1894             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1895             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1896             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1897             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1898             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1899             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1900             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1901             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1902             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1903             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1904             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1905             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1906             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1907             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1908             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1909             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1910             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1911             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1912             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1913             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1914             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1915
1916             /* Calculate squared distance and things based on it */
1917             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1918             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1919             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1920             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1921             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1922             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1923             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1924             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1925             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1926
1927             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1928             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1929             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1930             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1931             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1932             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1933             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1934             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1935             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1936
1937             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1938             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1939             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1940             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1941             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1942             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1943             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1944             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1945             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1946
1947             fjx0             = _fjsp_setzero_v2r8();
1948             fjy0             = _fjsp_setzero_v2r8();
1949             fjz0             = _fjsp_setzero_v2r8();
1950             fjx1             = _fjsp_setzero_v2r8();
1951             fjy1             = _fjsp_setzero_v2r8();
1952             fjz1             = _fjsp_setzero_v2r8();
1953             fjx2             = _fjsp_setzero_v2r8();
1954             fjy2             = _fjsp_setzero_v2r8();
1955             fjz2             = _fjsp_setzero_v2r8();
1956
1957             /**************************
1958              * CALCULATE INTERACTIONS *
1959              **************************/
1960
1961             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1962             {
1963
1964             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1965
1966             /* EWALD ELECTROSTATICS */
1967
1968             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1969             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
1970             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1971             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1972             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1973
1974             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1975             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1976             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1977
1978             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1979
1980             fscal            = felec;
1981
1982             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1983
1984             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1985
1986             /* Update vectorial force */
1987             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1988             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1989             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1990             
1991             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1992             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1993             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1994
1995             }
1996
1997             /**************************
1998              * CALCULATE INTERACTIONS *
1999              **************************/
2000
2001             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
2002             {
2003
2004             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
2005
2006             /* EWALD ELECTROSTATICS */
2007
2008             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2009             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
2010             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2011             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2012             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2013
2014             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2015             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2016             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
2017
2018             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
2019
2020             fscal            = felec;
2021
2022             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2023
2024             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2025
2026             /* Update vectorial force */
2027             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
2028             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
2029             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
2030             
2031             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
2032             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
2033             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
2034
2035             }
2036
2037             /**************************
2038              * CALCULATE INTERACTIONS *
2039              **************************/
2040
2041             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
2042             {
2043
2044             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
2045
2046             /* EWALD ELECTROSTATICS */
2047
2048             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2049             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
2050             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2051             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2052             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2053
2054             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2055             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2056             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
2057
2058             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
2059
2060             fscal            = felec;
2061
2062             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2063
2064             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2065
2066             /* Update vectorial force */
2067             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
2068             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
2069             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
2070             
2071             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
2072             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
2073             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
2074
2075             }
2076
2077             /**************************
2078              * CALCULATE INTERACTIONS *
2079              **************************/
2080
2081             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
2082             {
2083
2084             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
2085
2086             /* EWALD ELECTROSTATICS */
2087
2088             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2089             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
2090             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2091             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2092             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2093
2094             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2095             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2096             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
2097
2098             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
2099
2100             fscal            = felec;
2101
2102             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2103
2104             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2105
2106             /* Update vectorial force */
2107             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
2108             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
2109             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
2110             
2111             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
2112             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
2113             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
2114
2115             }
2116
2117             /**************************
2118              * CALCULATE INTERACTIONS *
2119              **************************/
2120
2121             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
2122             {
2123
2124             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
2125
2126             /* EWALD ELECTROSTATICS */
2127
2128             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2129             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
2130             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2131             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2132             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2133
2134             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2135             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2136             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
2137
2138             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
2139
2140             fscal            = felec;
2141
2142             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2143
2144             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2145
2146             /* Update vectorial force */
2147             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
2148             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2149             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2150             
2151             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2152             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2153             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2154
2155             }
2156
2157             /**************************
2158              * CALCULATE INTERACTIONS *
2159              **************************/
2160
2161             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
2162             {
2163
2164             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
2165
2166             /* EWALD ELECTROSTATICS */
2167
2168             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2169             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
2170             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2171             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2172             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2173
2174             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2175             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2176             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
2177
2178             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
2179
2180             fscal            = felec;
2181
2182             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2183
2184             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2185
2186             /* Update vectorial force */
2187             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
2188             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2189             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2190             
2191             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2192             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2193             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2194
2195             }
2196
2197             /**************************
2198              * CALCULATE INTERACTIONS *
2199              **************************/
2200
2201             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
2202             {
2203
2204             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
2205
2206             /* EWALD ELECTROSTATICS */
2207
2208             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2209             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
2210             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2211             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2212             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2213
2214             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2215             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2216             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
2217
2218             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
2219
2220             fscal            = felec;
2221
2222             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2223
2224             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2225
2226             /* Update vectorial force */
2227             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
2228             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2229             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2230             
2231             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2232             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2233             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2234
2235             }
2236
2237             /**************************
2238              * CALCULATE INTERACTIONS *
2239              **************************/
2240
2241             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
2242             {
2243
2244             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
2245
2246             /* EWALD ELECTROSTATICS */
2247
2248             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2249             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
2250             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2251             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2252             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2253
2254             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2255             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2256             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
2257
2258             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
2259
2260             fscal            = felec;
2261
2262             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2263
2264             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2265
2266             /* Update vectorial force */
2267             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
2268             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2269             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2270             
2271             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2272             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2273             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2274
2275             }
2276
2277             /**************************
2278              * CALCULATE INTERACTIONS *
2279              **************************/
2280
2281             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
2282             {
2283
2284             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
2285
2286             /* EWALD ELECTROSTATICS */
2287
2288             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2289             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
2290             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2291             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2292             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2293
2294             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2295             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2296             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
2297
2298             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
2299
2300             fscal            = felec;
2301
2302             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2303
2304             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2305
2306             /* Update vectorial force */
2307             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
2308             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2309             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2310             
2311             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2312             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2313             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2314
2315             }
2316
2317             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2318
2319             /* Inner loop uses 378 flops */
2320         }
2321
2322         /* End of innermost loop */
2323
2324         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2325                                               f+i_coord_offset,fshift+i_shift_offset);
2326
2327         /* Increment number of inner iterations */
2328         inneriter                  += j_index_end - j_index_start;
2329
2330         /* Outer loop uses 18 flops */
2331     }
2332
2333     /* Increment number of outer iterations */
2334     outeriter        += nri;
2335
2336     /* Update outer/inner flops */
2337
2338     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*378);
2339 }