e15e3fcd7eba59091c7f719d7d61267672be946d
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecEwSh_VdwNone_GeomW4W4_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "gromacs/legacyheaders/types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/legacyheaders/nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: Ewald
52  * VdW interaction:            None
53  * Geometry:                   Water4-Water4
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset1;
80     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
81     int              vdwioffset2;
82     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
83     int              vdwioffset3;
84     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
85     int              vdwjidx1A,vdwjidx1B;
86     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
87     int              vdwjidx2A,vdwjidx2B;
88     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
89     int              vdwjidx3A,vdwjidx3B;
90     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
91     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
92     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
93     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
94     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
95     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
96     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
97     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
98     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
99     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
100     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
101     real             *charge;
102     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
103     real             *ewtab;
104     _fjsp_v2r8       itab_tmp;
105     _fjsp_v2r8       dummy_mask,cutoff_mask;
106     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
107     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
108     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
109
110     x                = xx[0];
111     f                = ff[0];
112
113     nri              = nlist->nri;
114     iinr             = nlist->iinr;
115     jindex           = nlist->jindex;
116     jjnr             = nlist->jjnr;
117     shiftidx         = nlist->shift;
118     gid              = nlist->gid;
119     shiftvec         = fr->shift_vec[0];
120     fshift           = fr->fshift[0];
121     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
122     charge           = mdatoms->chargeA;
123
124     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
125     ewtab            = fr->ic->tabq_coul_FDV0;
126     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
127     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
128
129     /* Setup water-specific parameters */
130     inr              = nlist->iinr[0];
131     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
132     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
133     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
134
135     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
136     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
137     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
138     qq11             = _fjsp_mul_v2r8(iq1,jq1);
139     qq12             = _fjsp_mul_v2r8(iq1,jq2);
140     qq13             = _fjsp_mul_v2r8(iq1,jq3);
141     qq21             = _fjsp_mul_v2r8(iq2,jq1);
142     qq22             = _fjsp_mul_v2r8(iq2,jq2);
143     qq23             = _fjsp_mul_v2r8(iq2,jq3);
144     qq31             = _fjsp_mul_v2r8(iq3,jq1);
145     qq32             = _fjsp_mul_v2r8(iq3,jq2);
146     qq33             = _fjsp_mul_v2r8(iq3,jq3);
147
148     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
149     rcutoff_scalar   = fr->rcoulomb;
150     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
151     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
152
153     /* Avoid stupid compiler warnings */
154     jnrA = jnrB = 0;
155     j_coord_offsetA = 0;
156     j_coord_offsetB = 0;
157
158     outeriter        = 0;
159     inneriter        = 0;
160
161     /* Start outer loop over neighborlists */
162     for(iidx=0; iidx<nri; iidx++)
163     {
164         /* Load shift vector for this list */
165         i_shift_offset   = DIM*shiftidx[iidx];
166
167         /* Load limits for loop over neighbors */
168         j_index_start    = jindex[iidx];
169         j_index_end      = jindex[iidx+1];
170
171         /* Get outer coordinate index */
172         inr              = iinr[iidx];
173         i_coord_offset   = DIM*inr;
174
175         /* Load i particle coords and add shift vector */
176         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
177                                                  &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
178
179         fix1             = _fjsp_setzero_v2r8();
180         fiy1             = _fjsp_setzero_v2r8();
181         fiz1             = _fjsp_setzero_v2r8();
182         fix2             = _fjsp_setzero_v2r8();
183         fiy2             = _fjsp_setzero_v2r8();
184         fiz2             = _fjsp_setzero_v2r8();
185         fix3             = _fjsp_setzero_v2r8();
186         fiy3             = _fjsp_setzero_v2r8();
187         fiz3             = _fjsp_setzero_v2r8();
188
189         /* Reset potential sums */
190         velecsum         = _fjsp_setzero_v2r8();
191
192         /* Start inner kernel loop */
193         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
194         {
195
196             /* Get j neighbor index, and coordinate index */
197             jnrA             = jjnr[jidx];
198             jnrB             = jjnr[jidx+1];
199             j_coord_offsetA  = DIM*jnrA;
200             j_coord_offsetB  = DIM*jnrB;
201
202             /* load j atom coordinates */
203             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
204                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
205
206             /* Calculate displacement vector */
207             dx11             = _fjsp_sub_v2r8(ix1,jx1);
208             dy11             = _fjsp_sub_v2r8(iy1,jy1);
209             dz11             = _fjsp_sub_v2r8(iz1,jz1);
210             dx12             = _fjsp_sub_v2r8(ix1,jx2);
211             dy12             = _fjsp_sub_v2r8(iy1,jy2);
212             dz12             = _fjsp_sub_v2r8(iz1,jz2);
213             dx13             = _fjsp_sub_v2r8(ix1,jx3);
214             dy13             = _fjsp_sub_v2r8(iy1,jy3);
215             dz13             = _fjsp_sub_v2r8(iz1,jz3);
216             dx21             = _fjsp_sub_v2r8(ix2,jx1);
217             dy21             = _fjsp_sub_v2r8(iy2,jy1);
218             dz21             = _fjsp_sub_v2r8(iz2,jz1);
219             dx22             = _fjsp_sub_v2r8(ix2,jx2);
220             dy22             = _fjsp_sub_v2r8(iy2,jy2);
221             dz22             = _fjsp_sub_v2r8(iz2,jz2);
222             dx23             = _fjsp_sub_v2r8(ix2,jx3);
223             dy23             = _fjsp_sub_v2r8(iy2,jy3);
224             dz23             = _fjsp_sub_v2r8(iz2,jz3);
225             dx31             = _fjsp_sub_v2r8(ix3,jx1);
226             dy31             = _fjsp_sub_v2r8(iy3,jy1);
227             dz31             = _fjsp_sub_v2r8(iz3,jz1);
228             dx32             = _fjsp_sub_v2r8(ix3,jx2);
229             dy32             = _fjsp_sub_v2r8(iy3,jy2);
230             dz32             = _fjsp_sub_v2r8(iz3,jz2);
231             dx33             = _fjsp_sub_v2r8(ix3,jx3);
232             dy33             = _fjsp_sub_v2r8(iy3,jy3);
233             dz33             = _fjsp_sub_v2r8(iz3,jz3);
234
235             /* Calculate squared distance and things based on it */
236             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
237             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
238             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
239             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
240             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
241             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
242             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
243             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
244             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
245
246             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
247             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
248             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
249             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
250             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
251             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
252             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
253             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
254             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
255
256             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
257             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
258             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
259             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
260             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
261             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
262             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
263             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
264             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
265
266             fjx1             = _fjsp_setzero_v2r8();
267             fjy1             = _fjsp_setzero_v2r8();
268             fjz1             = _fjsp_setzero_v2r8();
269             fjx2             = _fjsp_setzero_v2r8();
270             fjy2             = _fjsp_setzero_v2r8();
271             fjz2             = _fjsp_setzero_v2r8();
272             fjx3             = _fjsp_setzero_v2r8();
273             fjy3             = _fjsp_setzero_v2r8();
274             fjz3             = _fjsp_setzero_v2r8();
275
276             /**************************
277              * CALCULATE INTERACTIONS *
278              **************************/
279
280             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
281             {
282
283             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
284
285             /* EWALD ELECTROSTATICS */
286
287             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
288             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
289             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
290             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
291             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
292
293             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
294             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
295             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
296             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
297             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
298             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
299             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
300             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
301             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
302             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
303
304             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
305
306             /* Update potential sum for this i atom from the interaction with this j atom. */
307             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
308             velecsum         = _fjsp_add_v2r8(velecsum,velec);
309
310             fscal            = felec;
311
312             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
313
314             /* Update vectorial force */
315             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
316             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
317             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
318             
319             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
320             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
321             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
322
323             }
324
325             /**************************
326              * CALCULATE INTERACTIONS *
327              **************************/
328
329             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
330             {
331
332             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
333
334             /* EWALD ELECTROSTATICS */
335
336             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
337             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
338             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
339             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
340             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
341
342             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
343             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
344             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
345             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
346             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
347             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
348             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
349             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
350             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
351             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
352
353             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
354
355             /* Update potential sum for this i atom from the interaction with this j atom. */
356             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
357             velecsum         = _fjsp_add_v2r8(velecsum,velec);
358
359             fscal            = felec;
360
361             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
362
363             /* Update vectorial force */
364             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
365             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
366             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
367             
368             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
369             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
370             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
371
372             }
373
374             /**************************
375              * CALCULATE INTERACTIONS *
376              **************************/
377
378             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
379             {
380
381             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
382
383             /* EWALD ELECTROSTATICS */
384
385             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
386             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
387             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
388             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
389             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
390
391             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
392             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
393             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
394             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
395             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
396             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
397             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
398             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
399             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
400             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
401
402             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
403
404             /* Update potential sum for this i atom from the interaction with this j atom. */
405             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
406             velecsum         = _fjsp_add_v2r8(velecsum,velec);
407
408             fscal            = felec;
409
410             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
411
412             /* Update vectorial force */
413             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
414             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
415             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
416             
417             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
418             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
419             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
420
421             }
422
423             /**************************
424              * CALCULATE INTERACTIONS *
425              **************************/
426
427             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
428             {
429
430             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
431
432             /* EWALD ELECTROSTATICS */
433
434             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
435             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
436             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
437             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
438             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
439
440             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
441             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
442             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
443             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
444             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
445             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
446             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
447             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
448             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
449             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
450
451             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
452
453             /* Update potential sum for this i atom from the interaction with this j atom. */
454             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
455             velecsum         = _fjsp_add_v2r8(velecsum,velec);
456
457             fscal            = felec;
458
459             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
460
461             /* Update vectorial force */
462             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
463             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
464             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
465             
466             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
467             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
468             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
469
470             }
471
472             /**************************
473              * CALCULATE INTERACTIONS *
474              **************************/
475
476             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
477             {
478
479             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
480
481             /* EWALD ELECTROSTATICS */
482
483             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
484             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
485             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
486             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
487             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
488
489             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
490             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
491             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
492             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
493             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
494             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
495             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
496             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
497             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
498             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
499
500             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
501
502             /* Update potential sum for this i atom from the interaction with this j atom. */
503             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
504             velecsum         = _fjsp_add_v2r8(velecsum,velec);
505
506             fscal            = felec;
507
508             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
509
510             /* Update vectorial force */
511             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
512             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
513             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
514             
515             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
516             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
517             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
518
519             }
520
521             /**************************
522              * CALCULATE INTERACTIONS *
523              **************************/
524
525             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
526             {
527
528             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
529
530             /* EWALD ELECTROSTATICS */
531
532             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
533             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
534             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
535             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
536             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
537
538             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
539             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
540             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
541             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
542             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
543             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
544             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
545             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
546             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
547             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
548
549             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
550
551             /* Update potential sum for this i atom from the interaction with this j atom. */
552             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
553             velecsum         = _fjsp_add_v2r8(velecsum,velec);
554
555             fscal            = felec;
556
557             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
558
559             /* Update vectorial force */
560             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
561             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
562             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
563             
564             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
565             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
566             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
567
568             }
569
570             /**************************
571              * CALCULATE INTERACTIONS *
572              **************************/
573
574             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
575             {
576
577             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
578
579             /* EWALD ELECTROSTATICS */
580
581             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
582             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
583             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
584             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
585             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
586
587             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
588             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
589             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
590             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
591             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
592             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
593             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
594             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
595             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
596             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
597
598             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
599
600             /* Update potential sum for this i atom from the interaction with this j atom. */
601             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
602             velecsum         = _fjsp_add_v2r8(velecsum,velec);
603
604             fscal            = felec;
605
606             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
607
608             /* Update vectorial force */
609             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
610             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
611             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
612             
613             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
614             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
615             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
616
617             }
618
619             /**************************
620              * CALCULATE INTERACTIONS *
621              **************************/
622
623             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
624             {
625
626             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
627
628             /* EWALD ELECTROSTATICS */
629
630             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
631             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
632             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
633             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
634             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
635
636             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
637             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
638             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
639             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
640             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
641             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
642             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
643             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
644             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
645             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
646
647             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
648
649             /* Update potential sum for this i atom from the interaction with this j atom. */
650             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
651             velecsum         = _fjsp_add_v2r8(velecsum,velec);
652
653             fscal            = felec;
654
655             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
656
657             /* Update vectorial force */
658             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
659             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
660             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
661             
662             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
663             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
664             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
665
666             }
667
668             /**************************
669              * CALCULATE INTERACTIONS *
670              **************************/
671
672             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
673             {
674
675             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
676
677             /* EWALD ELECTROSTATICS */
678
679             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
680             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
681             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
682             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
683             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
684
685             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
686             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
687             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
688             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
689             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
690             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
691             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
692             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
693             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
694             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
695
696             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
697
698             /* Update potential sum for this i atom from the interaction with this j atom. */
699             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
700             velecsum         = _fjsp_add_v2r8(velecsum,velec);
701
702             fscal            = felec;
703
704             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
705
706             /* Update vectorial force */
707             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
708             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
709             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
710             
711             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
712             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
713             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
714
715             }
716
717             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
718
719             /* Inner loop uses 441 flops */
720         }
721
722         if(jidx<j_index_end)
723         {
724
725             jnrA             = jjnr[jidx];
726             j_coord_offsetA  = DIM*jnrA;
727
728             /* load j atom coordinates */
729             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
730                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
731
732             /* Calculate displacement vector */
733             dx11             = _fjsp_sub_v2r8(ix1,jx1);
734             dy11             = _fjsp_sub_v2r8(iy1,jy1);
735             dz11             = _fjsp_sub_v2r8(iz1,jz1);
736             dx12             = _fjsp_sub_v2r8(ix1,jx2);
737             dy12             = _fjsp_sub_v2r8(iy1,jy2);
738             dz12             = _fjsp_sub_v2r8(iz1,jz2);
739             dx13             = _fjsp_sub_v2r8(ix1,jx3);
740             dy13             = _fjsp_sub_v2r8(iy1,jy3);
741             dz13             = _fjsp_sub_v2r8(iz1,jz3);
742             dx21             = _fjsp_sub_v2r8(ix2,jx1);
743             dy21             = _fjsp_sub_v2r8(iy2,jy1);
744             dz21             = _fjsp_sub_v2r8(iz2,jz1);
745             dx22             = _fjsp_sub_v2r8(ix2,jx2);
746             dy22             = _fjsp_sub_v2r8(iy2,jy2);
747             dz22             = _fjsp_sub_v2r8(iz2,jz2);
748             dx23             = _fjsp_sub_v2r8(ix2,jx3);
749             dy23             = _fjsp_sub_v2r8(iy2,jy3);
750             dz23             = _fjsp_sub_v2r8(iz2,jz3);
751             dx31             = _fjsp_sub_v2r8(ix3,jx1);
752             dy31             = _fjsp_sub_v2r8(iy3,jy1);
753             dz31             = _fjsp_sub_v2r8(iz3,jz1);
754             dx32             = _fjsp_sub_v2r8(ix3,jx2);
755             dy32             = _fjsp_sub_v2r8(iy3,jy2);
756             dz32             = _fjsp_sub_v2r8(iz3,jz2);
757             dx33             = _fjsp_sub_v2r8(ix3,jx3);
758             dy33             = _fjsp_sub_v2r8(iy3,jy3);
759             dz33             = _fjsp_sub_v2r8(iz3,jz3);
760
761             /* Calculate squared distance and things based on it */
762             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
763             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
764             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
765             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
766             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
767             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
768             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
769             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
770             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
771
772             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
773             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
774             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
775             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
776             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
777             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
778             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
779             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
780             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
781
782             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
783             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
784             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
785             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
786             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
787             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
788             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
789             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
790             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
791
792             fjx1             = _fjsp_setzero_v2r8();
793             fjy1             = _fjsp_setzero_v2r8();
794             fjz1             = _fjsp_setzero_v2r8();
795             fjx2             = _fjsp_setzero_v2r8();
796             fjy2             = _fjsp_setzero_v2r8();
797             fjz2             = _fjsp_setzero_v2r8();
798             fjx3             = _fjsp_setzero_v2r8();
799             fjy3             = _fjsp_setzero_v2r8();
800             fjz3             = _fjsp_setzero_v2r8();
801
802             /**************************
803              * CALCULATE INTERACTIONS *
804              **************************/
805
806             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
807             {
808
809             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
810
811             /* EWALD ELECTROSTATICS */
812
813             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
814             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
815             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
816             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
817             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
818
819             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
820             ewtabD           = _fjsp_setzero_v2r8();
821             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
822             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
823             ewtabFn          = _fjsp_setzero_v2r8();
824             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
825             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
826             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
827             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
828             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
829
830             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
831
832             /* Update potential sum for this i atom from the interaction with this j atom. */
833             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
834             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
835             velecsum         = _fjsp_add_v2r8(velecsum,velec);
836
837             fscal            = felec;
838
839             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
840
841             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
842
843             /* Update vectorial force */
844             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
845             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
846             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
847             
848             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
849             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
850             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
851
852             }
853
854             /**************************
855              * CALCULATE INTERACTIONS *
856              **************************/
857
858             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
859             {
860
861             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
862
863             /* EWALD ELECTROSTATICS */
864
865             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
866             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
867             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
868             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
869             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
870
871             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
872             ewtabD           = _fjsp_setzero_v2r8();
873             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
874             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
875             ewtabFn          = _fjsp_setzero_v2r8();
876             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
877             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
878             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
879             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
880             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
881
882             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
883
884             /* Update potential sum for this i atom from the interaction with this j atom. */
885             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
886             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
887             velecsum         = _fjsp_add_v2r8(velecsum,velec);
888
889             fscal            = felec;
890
891             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
892
893             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
894
895             /* Update vectorial force */
896             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
897             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
898             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
899             
900             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
901             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
902             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
903
904             }
905
906             /**************************
907              * CALCULATE INTERACTIONS *
908              **************************/
909
910             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
911             {
912
913             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
914
915             /* EWALD ELECTROSTATICS */
916
917             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
918             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
919             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
920             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
921             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
922
923             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
924             ewtabD           = _fjsp_setzero_v2r8();
925             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
926             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
927             ewtabFn          = _fjsp_setzero_v2r8();
928             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
929             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
930             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
931             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
932             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
933
934             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
935
936             /* Update potential sum for this i atom from the interaction with this j atom. */
937             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
938             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
939             velecsum         = _fjsp_add_v2r8(velecsum,velec);
940
941             fscal            = felec;
942
943             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
944
945             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
946
947             /* Update vectorial force */
948             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
949             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
950             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
951             
952             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
953             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
954             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
955
956             }
957
958             /**************************
959              * CALCULATE INTERACTIONS *
960              **************************/
961
962             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
963             {
964
965             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
966
967             /* EWALD ELECTROSTATICS */
968
969             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
970             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
971             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
972             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
973             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
974
975             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
976             ewtabD           = _fjsp_setzero_v2r8();
977             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
978             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
979             ewtabFn          = _fjsp_setzero_v2r8();
980             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
981             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
982             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
983             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
984             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
985
986             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
987
988             /* Update potential sum for this i atom from the interaction with this j atom. */
989             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
990             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
991             velecsum         = _fjsp_add_v2r8(velecsum,velec);
992
993             fscal            = felec;
994
995             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
996
997             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
998
999             /* Update vectorial force */
1000             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1001             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1002             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1003             
1004             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1005             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1006             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1007
1008             }
1009
1010             /**************************
1011              * CALCULATE INTERACTIONS *
1012              **************************/
1013
1014             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1015             {
1016
1017             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1018
1019             /* EWALD ELECTROSTATICS */
1020
1021             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1022             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1023             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1024             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1025             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1026
1027             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1028             ewtabD           = _fjsp_setzero_v2r8();
1029             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1030             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1031             ewtabFn          = _fjsp_setzero_v2r8();
1032             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1033             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1034             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1035             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
1036             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1037
1038             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1039
1040             /* Update potential sum for this i atom from the interaction with this j atom. */
1041             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1042             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1043             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1044
1045             fscal            = felec;
1046
1047             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1048
1049             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1050
1051             /* Update vectorial force */
1052             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1053             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1054             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1055             
1056             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1057             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1058             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1059
1060             }
1061
1062             /**************************
1063              * CALCULATE INTERACTIONS *
1064              **************************/
1065
1066             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1067             {
1068
1069             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
1070
1071             /* EWALD ELECTROSTATICS */
1072
1073             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1074             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
1075             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1076             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1077             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1078
1079             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1080             ewtabD           = _fjsp_setzero_v2r8();
1081             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1082             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1083             ewtabFn          = _fjsp_setzero_v2r8();
1084             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1085             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1086             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1087             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
1088             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
1089
1090             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
1091
1092             /* Update potential sum for this i atom from the interaction with this j atom. */
1093             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1094             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1095             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1096
1097             fscal            = felec;
1098
1099             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1100
1101             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1102
1103             /* Update vectorial force */
1104             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1105             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1106             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1107             
1108             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1109             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1110             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1111
1112             }
1113
1114             /**************************
1115              * CALCULATE INTERACTIONS *
1116              **************************/
1117
1118             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1119             {
1120
1121             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
1122
1123             /* EWALD ELECTROSTATICS */
1124
1125             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1126             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
1127             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1128             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1129             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1130
1131             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1132             ewtabD           = _fjsp_setzero_v2r8();
1133             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1134             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1135             ewtabFn          = _fjsp_setzero_v2r8();
1136             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1137             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1138             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1139             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
1140             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
1141
1142             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1143
1144             /* Update potential sum for this i atom from the interaction with this j atom. */
1145             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1146             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1147             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1148
1149             fscal            = felec;
1150
1151             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1152
1153             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1154
1155             /* Update vectorial force */
1156             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1157             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1158             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1159             
1160             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1161             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1162             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1163
1164             }
1165
1166             /**************************
1167              * CALCULATE INTERACTIONS *
1168              **************************/
1169
1170             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1171             {
1172
1173             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
1174
1175             /* EWALD ELECTROSTATICS */
1176
1177             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1178             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
1179             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1180             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1181             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1182
1183             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1184             ewtabD           = _fjsp_setzero_v2r8();
1185             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1186             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1187             ewtabFn          = _fjsp_setzero_v2r8();
1188             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1189             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1190             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1191             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
1192             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
1193
1194             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1195
1196             /* Update potential sum for this i atom from the interaction with this j atom. */
1197             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1198             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1199             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1200
1201             fscal            = felec;
1202
1203             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1204
1205             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1206
1207             /* Update vectorial force */
1208             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1209             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1210             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1211             
1212             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1213             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1214             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1215
1216             }
1217
1218             /**************************
1219              * CALCULATE INTERACTIONS *
1220              **************************/
1221
1222             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1223             {
1224
1225             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
1226
1227             /* EWALD ELECTROSTATICS */
1228
1229             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1230             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
1231             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1232             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1233             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1234
1235             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1236             ewtabD           = _fjsp_setzero_v2r8();
1237             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1238             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1239             ewtabFn          = _fjsp_setzero_v2r8();
1240             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1241             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1242             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1243             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
1244             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
1245
1246             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1247
1248             /* Update potential sum for this i atom from the interaction with this j atom. */
1249             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1250             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1251             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1252
1253             fscal            = felec;
1254
1255             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1256
1257             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1258
1259             /* Update vectorial force */
1260             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1261             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1262             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1263             
1264             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1265             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1266             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1267
1268             }
1269
1270             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1271
1272             /* Inner loop uses 441 flops */
1273         }
1274
1275         /* End of innermost loop */
1276
1277         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1278                                               f+i_coord_offset+DIM,fshift+i_shift_offset);
1279
1280         ggid                        = gid[iidx];
1281         /* Update potential energies */
1282         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1283
1284         /* Increment number of inner iterations */
1285         inneriter                  += j_index_end - j_index_start;
1286
1287         /* Outer loop uses 19 flops */
1288     }
1289
1290     /* Increment number of outer iterations */
1291     outeriter        += nri;
1292
1293     /* Update outer/inner flops */
1294
1295     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*441);
1296 }
1297 /*
1298  * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
1299  * Electrostatics interaction: Ewald
1300  * VdW interaction:            None
1301  * Geometry:                   Water4-Water4
1302  * Calculate force/pot:        Force
1303  */
1304 void
1305 nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
1306                     (t_nblist                    * gmx_restrict       nlist,
1307                      rvec                        * gmx_restrict          xx,
1308                      rvec                        * gmx_restrict          ff,
1309                      t_forcerec                  * gmx_restrict          fr,
1310                      t_mdatoms                   * gmx_restrict     mdatoms,
1311                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1312                      t_nrnb                      * gmx_restrict        nrnb)
1313 {
1314     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1315      * just 0 for non-waters.
1316      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1317      * jnr indices corresponding to data put in the four positions in the SIMD register.
1318      */
1319     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1320     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1321     int              jnrA,jnrB;
1322     int              j_coord_offsetA,j_coord_offsetB;
1323     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1324     real             rcutoff_scalar;
1325     real             *shiftvec,*fshift,*x,*f;
1326     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1327     int              vdwioffset1;
1328     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1329     int              vdwioffset2;
1330     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1331     int              vdwioffset3;
1332     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1333     int              vdwjidx1A,vdwjidx1B;
1334     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1335     int              vdwjidx2A,vdwjidx2B;
1336     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1337     int              vdwjidx3A,vdwjidx3B;
1338     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1339     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1340     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1341     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1342     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1343     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1344     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1345     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1346     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1347     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1348     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1349     real             *charge;
1350     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1351     real             *ewtab;
1352     _fjsp_v2r8       itab_tmp;
1353     _fjsp_v2r8       dummy_mask,cutoff_mask;
1354     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1355     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1356     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1357
1358     x                = xx[0];
1359     f                = ff[0];
1360
1361     nri              = nlist->nri;
1362     iinr             = nlist->iinr;
1363     jindex           = nlist->jindex;
1364     jjnr             = nlist->jjnr;
1365     shiftidx         = nlist->shift;
1366     gid              = nlist->gid;
1367     shiftvec         = fr->shift_vec[0];
1368     fshift           = fr->fshift[0];
1369     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1370     charge           = mdatoms->chargeA;
1371
1372     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1373     ewtab            = fr->ic->tabq_coul_F;
1374     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1375     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1376
1377     /* Setup water-specific parameters */
1378     inr              = nlist->iinr[0];
1379     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1380     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1381     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1382
1383     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1384     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1385     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
1386     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1387     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1388     qq13             = _fjsp_mul_v2r8(iq1,jq3);
1389     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1390     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1391     qq23             = _fjsp_mul_v2r8(iq2,jq3);
1392     qq31             = _fjsp_mul_v2r8(iq3,jq1);
1393     qq32             = _fjsp_mul_v2r8(iq3,jq2);
1394     qq33             = _fjsp_mul_v2r8(iq3,jq3);
1395
1396     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1397     rcutoff_scalar   = fr->rcoulomb;
1398     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1399     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
1400
1401     /* Avoid stupid compiler warnings */
1402     jnrA = jnrB = 0;
1403     j_coord_offsetA = 0;
1404     j_coord_offsetB = 0;
1405
1406     outeriter        = 0;
1407     inneriter        = 0;
1408
1409     /* Start outer loop over neighborlists */
1410     for(iidx=0; iidx<nri; iidx++)
1411     {
1412         /* Load shift vector for this list */
1413         i_shift_offset   = DIM*shiftidx[iidx];
1414
1415         /* Load limits for loop over neighbors */
1416         j_index_start    = jindex[iidx];
1417         j_index_end      = jindex[iidx+1];
1418
1419         /* Get outer coordinate index */
1420         inr              = iinr[iidx];
1421         i_coord_offset   = DIM*inr;
1422
1423         /* Load i particle coords and add shift vector */
1424         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
1425                                                  &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1426
1427         fix1             = _fjsp_setzero_v2r8();
1428         fiy1             = _fjsp_setzero_v2r8();
1429         fiz1             = _fjsp_setzero_v2r8();
1430         fix2             = _fjsp_setzero_v2r8();
1431         fiy2             = _fjsp_setzero_v2r8();
1432         fiz2             = _fjsp_setzero_v2r8();
1433         fix3             = _fjsp_setzero_v2r8();
1434         fiy3             = _fjsp_setzero_v2r8();
1435         fiz3             = _fjsp_setzero_v2r8();
1436
1437         /* Start inner kernel loop */
1438         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1439         {
1440
1441             /* Get j neighbor index, and coordinate index */
1442             jnrA             = jjnr[jidx];
1443             jnrB             = jjnr[jidx+1];
1444             j_coord_offsetA  = DIM*jnrA;
1445             j_coord_offsetB  = DIM*jnrB;
1446
1447             /* load j atom coordinates */
1448             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
1449                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
1450
1451             /* Calculate displacement vector */
1452             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1453             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1454             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1455             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1456             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1457             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1458             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1459             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1460             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1461             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1462             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1463             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1464             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1465             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1466             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1467             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1468             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1469             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1470             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1471             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1472             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1473             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1474             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1475             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1476             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1477             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1478             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1479
1480             /* Calculate squared distance and things based on it */
1481             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1482             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1483             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1484             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1485             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1486             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1487             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1488             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1489             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1490
1491             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1492             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1493             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1494             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1495             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1496             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1497             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1498             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1499             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1500
1501             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1502             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1503             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1504             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1505             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1506             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1507             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1508             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1509             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1510
1511             fjx1             = _fjsp_setzero_v2r8();
1512             fjy1             = _fjsp_setzero_v2r8();
1513             fjz1             = _fjsp_setzero_v2r8();
1514             fjx2             = _fjsp_setzero_v2r8();
1515             fjy2             = _fjsp_setzero_v2r8();
1516             fjz2             = _fjsp_setzero_v2r8();
1517             fjx3             = _fjsp_setzero_v2r8();
1518             fjy3             = _fjsp_setzero_v2r8();
1519             fjz3             = _fjsp_setzero_v2r8();
1520
1521             /**************************
1522              * CALCULATE INTERACTIONS *
1523              **************************/
1524
1525             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1526             {
1527
1528             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1529
1530             /* EWALD ELECTROSTATICS */
1531
1532             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1533             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1534             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1535             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1536             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1537
1538             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1539                                          &ewtabF,&ewtabFn);
1540             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1541             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1542
1543             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1544
1545             fscal            = felec;
1546
1547             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1548
1549             /* Update vectorial force */
1550             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1551             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1552             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1553             
1554             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1555             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1556             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1557
1558             }
1559
1560             /**************************
1561              * CALCULATE INTERACTIONS *
1562              **************************/
1563
1564             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1565             {
1566
1567             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1568
1569             /* EWALD ELECTROSTATICS */
1570
1571             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1572             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1573             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1574             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1575             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1576
1577             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1578                                          &ewtabF,&ewtabFn);
1579             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1580             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1581
1582             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1583
1584             fscal            = felec;
1585
1586             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1587
1588             /* Update vectorial force */
1589             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1590             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1591             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1592             
1593             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1594             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1595             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1596
1597             }
1598
1599             /**************************
1600              * CALCULATE INTERACTIONS *
1601              **************************/
1602
1603             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
1604             {
1605
1606             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
1607
1608             /* EWALD ELECTROSTATICS */
1609
1610             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1611             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
1612             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1613             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1614             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1615
1616             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1617                                          &ewtabF,&ewtabFn);
1618             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1619             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
1620
1621             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
1622
1623             fscal            = felec;
1624
1625             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1626
1627             /* Update vectorial force */
1628             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1629             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1630             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1631             
1632             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1633             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1634             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1635
1636             }
1637
1638             /**************************
1639              * CALCULATE INTERACTIONS *
1640              **************************/
1641
1642             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1643             {
1644
1645             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1646
1647             /* EWALD ELECTROSTATICS */
1648
1649             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1650             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1651             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1652             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1653             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1654
1655             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1656                                          &ewtabF,&ewtabFn);
1657             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1658             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1659
1660             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1661
1662             fscal            = felec;
1663
1664             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1665
1666             /* Update vectorial force */
1667             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1668             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1669             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1670             
1671             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1672             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1673             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1674
1675             }
1676
1677             /**************************
1678              * CALCULATE INTERACTIONS *
1679              **************************/
1680
1681             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1682             {
1683
1684             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1685
1686             /* EWALD ELECTROSTATICS */
1687
1688             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1689             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1690             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1691             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1692             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1693
1694             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1695                                          &ewtabF,&ewtabFn);
1696             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1697             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1698
1699             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1700
1701             fscal            = felec;
1702
1703             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1704
1705             /* Update vectorial force */
1706             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1707             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1708             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1709             
1710             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1711             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1712             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1713
1714             }
1715
1716             /**************************
1717              * CALCULATE INTERACTIONS *
1718              **************************/
1719
1720             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1721             {
1722
1723             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
1724
1725             /* EWALD ELECTROSTATICS */
1726
1727             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1728             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
1729             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1730             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1731             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1732
1733             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1734                                          &ewtabF,&ewtabFn);
1735             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1736             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
1737
1738             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
1739
1740             fscal            = felec;
1741
1742             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1743
1744             /* Update vectorial force */
1745             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1746             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1747             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1748             
1749             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1750             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1751             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1752
1753             }
1754
1755             /**************************
1756              * CALCULATE INTERACTIONS *
1757              **************************/
1758
1759             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1760             {
1761
1762             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
1763
1764             /* EWALD ELECTROSTATICS */
1765
1766             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1767             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
1768             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1769             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1770             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1771
1772             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1773                                          &ewtabF,&ewtabFn);
1774             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1775             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
1776
1777             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1778
1779             fscal            = felec;
1780
1781             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1782
1783             /* Update vectorial force */
1784             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1785             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1786             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1787             
1788             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1789             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1790             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1791
1792             }
1793
1794             /**************************
1795              * CALCULATE INTERACTIONS *
1796              **************************/
1797
1798             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1799             {
1800
1801             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
1802
1803             /* EWALD ELECTROSTATICS */
1804
1805             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1806             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
1807             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1808             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1809             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1810
1811             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1812                                          &ewtabF,&ewtabFn);
1813             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1814             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
1815
1816             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1817
1818             fscal            = felec;
1819
1820             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1821
1822             /* Update vectorial force */
1823             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1824             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1825             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1826             
1827             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1828             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1829             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1830
1831             }
1832
1833             /**************************
1834              * CALCULATE INTERACTIONS *
1835              **************************/
1836
1837             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1838             {
1839
1840             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
1841
1842             /* EWALD ELECTROSTATICS */
1843
1844             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1845             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
1846             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1847             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1848             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1849
1850             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1851                                          &ewtabF,&ewtabFn);
1852             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1853             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
1854
1855             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1856
1857             fscal            = felec;
1858
1859             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1860
1861             /* Update vectorial force */
1862             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1863             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1864             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1865             
1866             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1867             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1868             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1869
1870             }
1871
1872             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1873
1874             /* Inner loop uses 378 flops */
1875         }
1876
1877         if(jidx<j_index_end)
1878         {
1879
1880             jnrA             = jjnr[jidx];
1881             j_coord_offsetA  = DIM*jnrA;
1882
1883             /* load j atom coordinates */
1884             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
1885                                               &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
1886
1887             /* Calculate displacement vector */
1888             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1889             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1890             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1891             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1892             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1893             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1894             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1895             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1896             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1897             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1898             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1899             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1900             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1901             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1902             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1903             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1904             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1905             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1906             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1907             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1908             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1909             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1910             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1911             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1912             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1913             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1914             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1915
1916             /* Calculate squared distance and things based on it */
1917             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1918             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1919             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1920             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1921             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1922             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1923             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1924             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1925             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1926
1927             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1928             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1929             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1930             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1931             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1932             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1933             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1934             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1935             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1936
1937             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1938             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1939             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1940             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1941             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1942             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1943             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1944             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1945             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1946
1947             fjx1             = _fjsp_setzero_v2r8();
1948             fjy1             = _fjsp_setzero_v2r8();
1949             fjz1             = _fjsp_setzero_v2r8();
1950             fjx2             = _fjsp_setzero_v2r8();
1951             fjy2             = _fjsp_setzero_v2r8();
1952             fjz2             = _fjsp_setzero_v2r8();
1953             fjx3             = _fjsp_setzero_v2r8();
1954             fjy3             = _fjsp_setzero_v2r8();
1955             fjz3             = _fjsp_setzero_v2r8();
1956
1957             /**************************
1958              * CALCULATE INTERACTIONS *
1959              **************************/
1960
1961             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1962             {
1963
1964             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1965
1966             /* EWALD ELECTROSTATICS */
1967
1968             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1969             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1970             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1971             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1972             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1973
1974             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
1975             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1976             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1977
1978             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1979
1980             fscal            = felec;
1981
1982             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1983
1984             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1985
1986             /* Update vectorial force */
1987             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1988             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1989             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1990             
1991             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1992             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1993             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1994
1995             }
1996
1997             /**************************
1998              * CALCULATE INTERACTIONS *
1999              **************************/
2000
2001             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
2002             {
2003
2004             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
2005
2006             /* EWALD ELECTROSTATICS */
2007
2008             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2009             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
2010             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2011             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2012             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2013
2014             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2015             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2016             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
2017
2018             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
2019
2020             fscal            = felec;
2021
2022             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2023
2024             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2025
2026             /* Update vectorial force */
2027             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
2028             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2029             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2030             
2031             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2032             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2033             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2034
2035             }
2036
2037             /**************************
2038              * CALCULATE INTERACTIONS *
2039              **************************/
2040
2041             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
2042             {
2043
2044             r13              = _fjsp_mul_v2r8(rsq13,rinv13);
2045
2046             /* EWALD ELECTROSTATICS */
2047
2048             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2049             ewrt             = _fjsp_mul_v2r8(r13,ewtabscale);
2050             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2051             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2052             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2053
2054             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2055             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2056             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
2057
2058             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
2059
2060             fscal            = felec;
2061
2062             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2063
2064             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2065
2066             /* Update vectorial force */
2067             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
2068             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
2069             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
2070             
2071             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
2072             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
2073             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
2074
2075             }
2076
2077             /**************************
2078              * CALCULATE INTERACTIONS *
2079              **************************/
2080
2081             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
2082             {
2083
2084             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
2085
2086             /* EWALD ELECTROSTATICS */
2087
2088             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2089             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
2090             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2091             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2092             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2093
2094             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2095             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2096             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
2097
2098             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
2099
2100             fscal            = felec;
2101
2102             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2103
2104             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2105
2106             /* Update vectorial force */
2107             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
2108             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2109             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2110             
2111             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2112             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2113             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2114
2115             }
2116
2117             /**************************
2118              * CALCULATE INTERACTIONS *
2119              **************************/
2120
2121             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
2122             {
2123
2124             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
2125
2126             /* EWALD ELECTROSTATICS */
2127
2128             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2129             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
2130             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2131             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2132             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2133
2134             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2135             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2136             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
2137
2138             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
2139
2140             fscal            = felec;
2141
2142             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2143
2144             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2145
2146             /* Update vectorial force */
2147             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
2148             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2149             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2150             
2151             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2152             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2153             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2154
2155             }
2156
2157             /**************************
2158              * CALCULATE INTERACTIONS *
2159              **************************/
2160
2161             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
2162             {
2163
2164             r23              = _fjsp_mul_v2r8(rsq23,rinv23);
2165
2166             /* EWALD ELECTROSTATICS */
2167
2168             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2169             ewrt             = _fjsp_mul_v2r8(r23,ewtabscale);
2170             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2171             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2172             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2173
2174             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2175             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2176             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
2177
2178             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
2179
2180             fscal            = felec;
2181
2182             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2183
2184             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2185
2186             /* Update vectorial force */
2187             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
2188             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
2189             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
2190             
2191             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
2192             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
2193             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
2194
2195             }
2196
2197             /**************************
2198              * CALCULATE INTERACTIONS *
2199              **************************/
2200
2201             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
2202             {
2203
2204             r31              = _fjsp_mul_v2r8(rsq31,rinv31);
2205
2206             /* EWALD ELECTROSTATICS */
2207
2208             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2209             ewrt             = _fjsp_mul_v2r8(r31,ewtabscale);
2210             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2211             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2212             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2213
2214             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2215             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2216             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
2217
2218             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
2219
2220             fscal            = felec;
2221
2222             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2223
2224             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2225
2226             /* Update vectorial force */
2227             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
2228             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
2229             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
2230             
2231             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
2232             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
2233             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
2234
2235             }
2236
2237             /**************************
2238              * CALCULATE INTERACTIONS *
2239              **************************/
2240
2241             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
2242             {
2243
2244             r32              = _fjsp_mul_v2r8(rsq32,rinv32);
2245
2246             /* EWALD ELECTROSTATICS */
2247
2248             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2249             ewrt             = _fjsp_mul_v2r8(r32,ewtabscale);
2250             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2251             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2252             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2253
2254             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2255             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2256             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
2257
2258             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
2259
2260             fscal            = felec;
2261
2262             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2263
2264             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2265
2266             /* Update vectorial force */
2267             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
2268             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
2269             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
2270             
2271             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
2272             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
2273             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
2274
2275             }
2276
2277             /**************************
2278              * CALCULATE INTERACTIONS *
2279              **************************/
2280
2281             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
2282             {
2283
2284             r33              = _fjsp_mul_v2r8(rsq33,rinv33);
2285
2286             /* EWALD ELECTROSTATICS */
2287
2288             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2289             ewrt             = _fjsp_mul_v2r8(r33,ewtabscale);
2290             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2291             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2292             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2293
2294             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2295             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2296             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
2297
2298             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
2299
2300             fscal            = felec;
2301
2302             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2303
2304             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2305
2306             /* Update vectorial force */
2307             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
2308             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
2309             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
2310             
2311             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
2312             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
2313             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
2314
2315             }
2316
2317             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2318
2319             /* Inner loop uses 378 flops */
2320         }
2321
2322         /* End of innermost loop */
2323
2324         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2325                                               f+i_coord_offset+DIM,fshift+i_shift_offset);
2326
2327         /* Increment number of inner iterations */
2328         inneriter                  += j_index_end - j_index_start;
2329
2330         /* Outer loop uses 18 flops */
2331     }
2332
2333     /* Increment number of outer iterations */
2334     outeriter        += nri;
2335
2336     /* Update outer/inner flops */
2337
2338     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*378);
2339 }