dac022e976b0148c50e2388f0c69ca04c6e3c5b3
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "gromacs/math/vec.h"
47 #include "nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: Ewald
54  * VdW interaction:            LennardJones
55  * Geometry:                   Water3-Water3
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset0;
82     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
83     int              vdwioffset1;
84     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
85     int              vdwioffset2;
86     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87     int              vdwjidx0A,vdwjidx0B;
88     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89     int              vdwjidx1A,vdwjidx1B;
90     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91     int              vdwjidx2A,vdwjidx2B;
92     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
103     real             *charge;
104     int              nvdwtype;
105     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
106     int              *vdwtype;
107     real             *vdwparam;
108     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
109     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
110     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
111     real             *ewtab;
112     _fjsp_v2r8       itab_tmp;
113     _fjsp_v2r8       dummy_mask,cutoff_mask;
114     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
115     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
116     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
117
118     x                = xx[0];
119     f                = ff[0];
120
121     nri              = nlist->nri;
122     iinr             = nlist->iinr;
123     jindex           = nlist->jindex;
124     jjnr             = nlist->jjnr;
125     shiftidx         = nlist->shift;
126     gid              = nlist->gid;
127     shiftvec         = fr->shift_vec[0];
128     fshift           = fr->fshift[0];
129     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
130     charge           = mdatoms->chargeA;
131     nvdwtype         = fr->ntype;
132     vdwparam         = fr->nbfp;
133     vdwtype          = mdatoms->typeA;
134
135     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
136     ewtab            = fr->ic->tabq_coul_FDV0;
137     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
138     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
139
140     /* Setup water-specific parameters */
141     inr              = nlist->iinr[0];
142     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
143     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
144     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
145     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
146
147     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
148     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
149     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
150     vdwjidx0A        = 2*vdwtype[inr+0];
151     qq00             = _fjsp_mul_v2r8(iq0,jq0);
152     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
153     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
154     qq01             = _fjsp_mul_v2r8(iq0,jq1);
155     qq02             = _fjsp_mul_v2r8(iq0,jq2);
156     qq10             = _fjsp_mul_v2r8(iq1,jq0);
157     qq11             = _fjsp_mul_v2r8(iq1,jq1);
158     qq12             = _fjsp_mul_v2r8(iq1,jq2);
159     qq20             = _fjsp_mul_v2r8(iq2,jq0);
160     qq21             = _fjsp_mul_v2r8(iq2,jq1);
161     qq22             = _fjsp_mul_v2r8(iq2,jq2);
162
163     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
164     rcutoff_scalar   = fr->rcoulomb;
165     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
166     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
167
168     sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
169     rvdw             = gmx_fjsp_set1_v2r8(fr->rvdw);
170
171     /* Avoid stupid compiler warnings */
172     jnrA = jnrB = 0;
173     j_coord_offsetA = 0;
174     j_coord_offsetB = 0;
175
176     outeriter        = 0;
177     inneriter        = 0;
178
179     /* Start outer loop over neighborlists */
180     for(iidx=0; iidx<nri; iidx++)
181     {
182         /* Load shift vector for this list */
183         i_shift_offset   = DIM*shiftidx[iidx];
184
185         /* Load limits for loop over neighbors */
186         j_index_start    = jindex[iidx];
187         j_index_end      = jindex[iidx+1];
188
189         /* Get outer coordinate index */
190         inr              = iinr[iidx];
191         i_coord_offset   = DIM*inr;
192
193         /* Load i particle coords and add shift vector */
194         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
195                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
196
197         fix0             = _fjsp_setzero_v2r8();
198         fiy0             = _fjsp_setzero_v2r8();
199         fiz0             = _fjsp_setzero_v2r8();
200         fix1             = _fjsp_setzero_v2r8();
201         fiy1             = _fjsp_setzero_v2r8();
202         fiz1             = _fjsp_setzero_v2r8();
203         fix2             = _fjsp_setzero_v2r8();
204         fiy2             = _fjsp_setzero_v2r8();
205         fiz2             = _fjsp_setzero_v2r8();
206
207         /* Reset potential sums */
208         velecsum         = _fjsp_setzero_v2r8();
209         vvdwsum          = _fjsp_setzero_v2r8();
210
211         /* Start inner kernel loop */
212         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
213         {
214
215             /* Get j neighbor index, and coordinate index */
216             jnrA             = jjnr[jidx];
217             jnrB             = jjnr[jidx+1];
218             j_coord_offsetA  = DIM*jnrA;
219             j_coord_offsetB  = DIM*jnrB;
220
221             /* load j atom coordinates */
222             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
223                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
224
225             /* Calculate displacement vector */
226             dx00             = _fjsp_sub_v2r8(ix0,jx0);
227             dy00             = _fjsp_sub_v2r8(iy0,jy0);
228             dz00             = _fjsp_sub_v2r8(iz0,jz0);
229             dx01             = _fjsp_sub_v2r8(ix0,jx1);
230             dy01             = _fjsp_sub_v2r8(iy0,jy1);
231             dz01             = _fjsp_sub_v2r8(iz0,jz1);
232             dx02             = _fjsp_sub_v2r8(ix0,jx2);
233             dy02             = _fjsp_sub_v2r8(iy0,jy2);
234             dz02             = _fjsp_sub_v2r8(iz0,jz2);
235             dx10             = _fjsp_sub_v2r8(ix1,jx0);
236             dy10             = _fjsp_sub_v2r8(iy1,jy0);
237             dz10             = _fjsp_sub_v2r8(iz1,jz0);
238             dx11             = _fjsp_sub_v2r8(ix1,jx1);
239             dy11             = _fjsp_sub_v2r8(iy1,jy1);
240             dz11             = _fjsp_sub_v2r8(iz1,jz1);
241             dx12             = _fjsp_sub_v2r8(ix1,jx2);
242             dy12             = _fjsp_sub_v2r8(iy1,jy2);
243             dz12             = _fjsp_sub_v2r8(iz1,jz2);
244             dx20             = _fjsp_sub_v2r8(ix2,jx0);
245             dy20             = _fjsp_sub_v2r8(iy2,jy0);
246             dz20             = _fjsp_sub_v2r8(iz2,jz0);
247             dx21             = _fjsp_sub_v2r8(ix2,jx1);
248             dy21             = _fjsp_sub_v2r8(iy2,jy1);
249             dz21             = _fjsp_sub_v2r8(iz2,jz1);
250             dx22             = _fjsp_sub_v2r8(ix2,jx2);
251             dy22             = _fjsp_sub_v2r8(iy2,jy2);
252             dz22             = _fjsp_sub_v2r8(iz2,jz2);
253
254             /* Calculate squared distance and things based on it */
255             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
256             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
257             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
258             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
259             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
260             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
261             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
262             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
263             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
264
265             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
266             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
267             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
268             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
269             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
270             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
271             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
272             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
273             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
274
275             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
276             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
277             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
278             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
279             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
280             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
281             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
282             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
283             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
284
285             fjx0             = _fjsp_setzero_v2r8();
286             fjy0             = _fjsp_setzero_v2r8();
287             fjz0             = _fjsp_setzero_v2r8();
288             fjx1             = _fjsp_setzero_v2r8();
289             fjy1             = _fjsp_setzero_v2r8();
290             fjz1             = _fjsp_setzero_v2r8();
291             fjx2             = _fjsp_setzero_v2r8();
292             fjy2             = _fjsp_setzero_v2r8();
293             fjz2             = _fjsp_setzero_v2r8();
294
295             /**************************
296              * CALCULATE INTERACTIONS *
297              **************************/
298
299             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
300             {
301
302             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
303
304             /* EWALD ELECTROSTATICS */
305
306             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
307             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
308             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
309             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
310             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
311
312             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
313             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
314             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
315             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
316             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
317             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
318             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
319             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
320             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
321             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
322
323             /* LENNARD-JONES DISPERSION/REPULSION */
324
325             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
326             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
327             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
328             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
329                                            _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
330             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
331
332             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
333
334             /* Update potential sum for this i atom from the interaction with this j atom. */
335             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
336             velecsum         = _fjsp_add_v2r8(velecsum,velec);
337             vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
338             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
339
340             fscal            = _fjsp_add_v2r8(felec,fvdw);
341
342             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
343
344             /* Update vectorial force */
345             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
346             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
347             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
348             
349             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
350             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
351             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
352
353             }
354
355             /**************************
356              * CALCULATE INTERACTIONS *
357              **************************/
358
359             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
360             {
361
362             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
363
364             /* EWALD ELECTROSTATICS */
365
366             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
367             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
368             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
369             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
370             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
371
372             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
373             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
374             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
375             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
376             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
377             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
378             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
379             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
380             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
381             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
382
383             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
384
385             /* Update potential sum for this i atom from the interaction with this j atom. */
386             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
387             velecsum         = _fjsp_add_v2r8(velecsum,velec);
388
389             fscal            = felec;
390
391             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
392
393             /* Update vectorial force */
394             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
395             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
396             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
397             
398             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
399             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
400             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
401
402             }
403
404             /**************************
405              * CALCULATE INTERACTIONS *
406              **************************/
407
408             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
409             {
410
411             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
412
413             /* EWALD ELECTROSTATICS */
414
415             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
416             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
417             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
418             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
419             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
420
421             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
422             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
423             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
424             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
425             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
426             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
427             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
428             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
429             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
430             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
431
432             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
433
434             /* Update potential sum for this i atom from the interaction with this j atom. */
435             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
436             velecsum         = _fjsp_add_v2r8(velecsum,velec);
437
438             fscal            = felec;
439
440             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
441
442             /* Update vectorial force */
443             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
444             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
445             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
446             
447             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
448             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
449             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
450
451             }
452
453             /**************************
454              * CALCULATE INTERACTIONS *
455              **************************/
456
457             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
458             {
459
460             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
461
462             /* EWALD ELECTROSTATICS */
463
464             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
465             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
466             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
467             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
468             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
469
470             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
471             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
472             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
473             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
474             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
475             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
476             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
477             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
478             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
479             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
480
481             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
482
483             /* Update potential sum for this i atom from the interaction with this j atom. */
484             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
485             velecsum         = _fjsp_add_v2r8(velecsum,velec);
486
487             fscal            = felec;
488
489             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
490
491             /* Update vectorial force */
492             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
493             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
494             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
495             
496             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
497             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
498             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
499
500             }
501
502             /**************************
503              * CALCULATE INTERACTIONS *
504              **************************/
505
506             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
507             {
508
509             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
510
511             /* EWALD ELECTROSTATICS */
512
513             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
514             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
515             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
516             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
517             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
518
519             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
520             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
521             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
522             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
523             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
524             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
525             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
526             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
527             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
528             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
529
530             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
531
532             /* Update potential sum for this i atom from the interaction with this j atom. */
533             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
534             velecsum         = _fjsp_add_v2r8(velecsum,velec);
535
536             fscal            = felec;
537
538             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
539
540             /* Update vectorial force */
541             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
542             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
543             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
544             
545             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
546             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
547             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
548
549             }
550
551             /**************************
552              * CALCULATE INTERACTIONS *
553              **************************/
554
555             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
556             {
557
558             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
559
560             /* EWALD ELECTROSTATICS */
561
562             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
563             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
564             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
565             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
566             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
567
568             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
569             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
570             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
571             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
572             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
573             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
574             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
575             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
576             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
577             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
578
579             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
580
581             /* Update potential sum for this i atom from the interaction with this j atom. */
582             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
583             velecsum         = _fjsp_add_v2r8(velecsum,velec);
584
585             fscal            = felec;
586
587             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
588
589             /* Update vectorial force */
590             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
591             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
592             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
593             
594             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
595             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
596             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
597
598             }
599
600             /**************************
601              * CALCULATE INTERACTIONS *
602              **************************/
603
604             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
605             {
606
607             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
608
609             /* EWALD ELECTROSTATICS */
610
611             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
612             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
613             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
614             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
615             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
616
617             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
618             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
619             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
620             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
621             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
622             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
623             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
624             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
625             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
626             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
627
628             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
629
630             /* Update potential sum for this i atom from the interaction with this j atom. */
631             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
632             velecsum         = _fjsp_add_v2r8(velecsum,velec);
633
634             fscal            = felec;
635
636             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
637
638             /* Update vectorial force */
639             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
640             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
641             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
642             
643             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
644             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
645             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
646
647             }
648
649             /**************************
650              * CALCULATE INTERACTIONS *
651              **************************/
652
653             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
654             {
655
656             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
657
658             /* EWALD ELECTROSTATICS */
659
660             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
661             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
662             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
663             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
664             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
665
666             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
667             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
668             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
669             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
670             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
671             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
672             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
673             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
674             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
675             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
676
677             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
678
679             /* Update potential sum for this i atom from the interaction with this j atom. */
680             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
681             velecsum         = _fjsp_add_v2r8(velecsum,velec);
682
683             fscal            = felec;
684
685             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
686
687             /* Update vectorial force */
688             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
689             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
690             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
691             
692             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
693             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
694             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
695
696             }
697
698             /**************************
699              * CALCULATE INTERACTIONS *
700              **************************/
701
702             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
703             {
704
705             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
706
707             /* EWALD ELECTROSTATICS */
708
709             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
710             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
711             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
712             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
713             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
714
715             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
716             ewtabD           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
717             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
718             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
719             ewtabFn          = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
720             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
721             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
722             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
723             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
724             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
725
726             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
727
728             /* Update potential sum for this i atom from the interaction with this j atom. */
729             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
730             velecsum         = _fjsp_add_v2r8(velecsum,velec);
731
732             fscal            = felec;
733
734             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
735
736             /* Update vectorial force */
737             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
738             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
739             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
740             
741             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
742             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
743             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
744
745             }
746
747             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
748
749             /* Inner loop uses 459 flops */
750         }
751
752         if(jidx<j_index_end)
753         {
754
755             jnrA             = jjnr[jidx];
756             j_coord_offsetA  = DIM*jnrA;
757
758             /* load j atom coordinates */
759             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
760                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
761
762             /* Calculate displacement vector */
763             dx00             = _fjsp_sub_v2r8(ix0,jx0);
764             dy00             = _fjsp_sub_v2r8(iy0,jy0);
765             dz00             = _fjsp_sub_v2r8(iz0,jz0);
766             dx01             = _fjsp_sub_v2r8(ix0,jx1);
767             dy01             = _fjsp_sub_v2r8(iy0,jy1);
768             dz01             = _fjsp_sub_v2r8(iz0,jz1);
769             dx02             = _fjsp_sub_v2r8(ix0,jx2);
770             dy02             = _fjsp_sub_v2r8(iy0,jy2);
771             dz02             = _fjsp_sub_v2r8(iz0,jz2);
772             dx10             = _fjsp_sub_v2r8(ix1,jx0);
773             dy10             = _fjsp_sub_v2r8(iy1,jy0);
774             dz10             = _fjsp_sub_v2r8(iz1,jz0);
775             dx11             = _fjsp_sub_v2r8(ix1,jx1);
776             dy11             = _fjsp_sub_v2r8(iy1,jy1);
777             dz11             = _fjsp_sub_v2r8(iz1,jz1);
778             dx12             = _fjsp_sub_v2r8(ix1,jx2);
779             dy12             = _fjsp_sub_v2r8(iy1,jy2);
780             dz12             = _fjsp_sub_v2r8(iz1,jz2);
781             dx20             = _fjsp_sub_v2r8(ix2,jx0);
782             dy20             = _fjsp_sub_v2r8(iy2,jy0);
783             dz20             = _fjsp_sub_v2r8(iz2,jz0);
784             dx21             = _fjsp_sub_v2r8(ix2,jx1);
785             dy21             = _fjsp_sub_v2r8(iy2,jy1);
786             dz21             = _fjsp_sub_v2r8(iz2,jz1);
787             dx22             = _fjsp_sub_v2r8(ix2,jx2);
788             dy22             = _fjsp_sub_v2r8(iy2,jy2);
789             dz22             = _fjsp_sub_v2r8(iz2,jz2);
790
791             /* Calculate squared distance and things based on it */
792             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
793             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
794             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
795             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
796             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
797             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
798             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
799             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
800             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
801
802             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
803             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
804             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
805             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
806             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
807             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
808             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
809             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
810             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
811
812             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
813             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
814             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
815             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
816             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
817             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
818             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
819             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
820             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
821
822             fjx0             = _fjsp_setzero_v2r8();
823             fjy0             = _fjsp_setzero_v2r8();
824             fjz0             = _fjsp_setzero_v2r8();
825             fjx1             = _fjsp_setzero_v2r8();
826             fjy1             = _fjsp_setzero_v2r8();
827             fjz1             = _fjsp_setzero_v2r8();
828             fjx2             = _fjsp_setzero_v2r8();
829             fjy2             = _fjsp_setzero_v2r8();
830             fjz2             = _fjsp_setzero_v2r8();
831
832             /**************************
833              * CALCULATE INTERACTIONS *
834              **************************/
835
836             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
837             {
838
839             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
840
841             /* EWALD ELECTROSTATICS */
842
843             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
844             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
845             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
846             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
847             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
848
849             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
850             ewtabD           = _fjsp_setzero_v2r8();
851             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
852             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
853             ewtabFn          = _fjsp_setzero_v2r8();
854             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
855             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
856             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
857             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
858             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
859
860             /* LENNARD-JONES DISPERSION/REPULSION */
861
862             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
863             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
864             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
865             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
866                                            _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
867             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
868
869             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
870
871             /* Update potential sum for this i atom from the interaction with this j atom. */
872             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
873             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
874             velecsum         = _fjsp_add_v2r8(velecsum,velec);
875             vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
876             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
877             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
878
879             fscal            = _fjsp_add_v2r8(felec,fvdw);
880
881             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
882
883             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
884
885             /* Update vectorial force */
886             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
887             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
888             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
889             
890             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
891             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
892             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
893
894             }
895
896             /**************************
897              * CALCULATE INTERACTIONS *
898              **************************/
899
900             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
901             {
902
903             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
904
905             /* EWALD ELECTROSTATICS */
906
907             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
908             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
909             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
910             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
911             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
912
913             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
914             ewtabD           = _fjsp_setzero_v2r8();
915             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
916             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
917             ewtabFn          = _fjsp_setzero_v2r8();
918             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
919             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
920             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
921             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
922             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
923
924             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
925
926             /* Update potential sum for this i atom from the interaction with this j atom. */
927             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
928             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
929             velecsum         = _fjsp_add_v2r8(velecsum,velec);
930
931             fscal            = felec;
932
933             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
934
935             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
936
937             /* Update vectorial force */
938             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
939             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
940             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
941             
942             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
943             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
944             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
945
946             }
947
948             /**************************
949              * CALCULATE INTERACTIONS *
950              **************************/
951
952             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
953             {
954
955             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
956
957             /* EWALD ELECTROSTATICS */
958
959             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
960             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
961             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
962             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
963             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
964
965             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
966             ewtabD           = _fjsp_setzero_v2r8();
967             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
968             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
969             ewtabFn          = _fjsp_setzero_v2r8();
970             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
971             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
972             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
973             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
974             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
975
976             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
977
978             /* Update potential sum for this i atom from the interaction with this j atom. */
979             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
980             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
981             velecsum         = _fjsp_add_v2r8(velecsum,velec);
982
983             fscal            = felec;
984
985             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
986
987             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
988
989             /* Update vectorial force */
990             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
991             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
992             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
993             
994             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
995             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
996             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
997
998             }
999
1000             /**************************
1001              * CALCULATE INTERACTIONS *
1002              **************************/
1003
1004             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1005             {
1006
1007             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
1008
1009             /* EWALD ELECTROSTATICS */
1010
1011             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1012             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
1013             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1014             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1015             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1016
1017             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1018             ewtabD           = _fjsp_setzero_v2r8();
1019             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1020             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1021             ewtabFn          = _fjsp_setzero_v2r8();
1022             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1023             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1024             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1025             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
1026             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1027
1028             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1029
1030             /* Update potential sum for this i atom from the interaction with this j atom. */
1031             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1032             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1033             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1034
1035             fscal            = felec;
1036
1037             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1038
1039             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1040
1041             /* Update vectorial force */
1042             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1043             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1044             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1045             
1046             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1047             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1048             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1049
1050             }
1051
1052             /**************************
1053              * CALCULATE INTERACTIONS *
1054              **************************/
1055
1056             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1057             {
1058
1059             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1060
1061             /* EWALD ELECTROSTATICS */
1062
1063             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1064             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1065             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1066             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1067             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1068
1069             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1070             ewtabD           = _fjsp_setzero_v2r8();
1071             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1072             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1073             ewtabFn          = _fjsp_setzero_v2r8();
1074             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1075             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1076             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1077             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
1078             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1079
1080             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1081
1082             /* Update potential sum for this i atom from the interaction with this j atom. */
1083             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1084             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1085             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1086
1087             fscal            = felec;
1088
1089             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1090
1091             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1092
1093             /* Update vectorial force */
1094             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1095             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1096             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1097             
1098             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1099             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1100             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1101
1102             }
1103
1104             /**************************
1105              * CALCULATE INTERACTIONS *
1106              **************************/
1107
1108             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1109             {
1110
1111             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1112
1113             /* EWALD ELECTROSTATICS */
1114
1115             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1116             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1117             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1118             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1119             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1120
1121             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1122             ewtabD           = _fjsp_setzero_v2r8();
1123             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1124             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1125             ewtabFn          = _fjsp_setzero_v2r8();
1126             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1127             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1128             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1129             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
1130             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1131
1132             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1133
1134             /* Update potential sum for this i atom from the interaction with this j atom. */
1135             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1136             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1137             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1138
1139             fscal            = felec;
1140
1141             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1142
1143             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1144
1145             /* Update vectorial force */
1146             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1147             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1148             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1149             
1150             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1151             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1152             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1153
1154             }
1155
1156             /**************************
1157              * CALCULATE INTERACTIONS *
1158              **************************/
1159
1160             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1161             {
1162
1163             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1164
1165             /* EWALD ELECTROSTATICS */
1166
1167             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1168             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
1169             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1170             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1171             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1172
1173             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1174             ewtabD           = _fjsp_setzero_v2r8();
1175             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1176             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1177             ewtabFn          = _fjsp_setzero_v2r8();
1178             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1179             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1180             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1181             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
1182             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1183
1184             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1185
1186             /* Update potential sum for this i atom from the interaction with this j atom. */
1187             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1188             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1189             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1190
1191             fscal            = felec;
1192
1193             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1194
1195             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1196
1197             /* Update vectorial force */
1198             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1199             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1200             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1201             
1202             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1203             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1204             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1205
1206             }
1207
1208             /**************************
1209              * CALCULATE INTERACTIONS *
1210              **************************/
1211
1212             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1213             {
1214
1215             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1216
1217             /* EWALD ELECTROSTATICS */
1218
1219             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1220             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1221             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1222             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1223             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1224
1225             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1226             ewtabD           = _fjsp_setzero_v2r8();
1227             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1228             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1229             ewtabFn          = _fjsp_setzero_v2r8();
1230             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1231             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1232             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1233             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
1234             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1235
1236             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1237
1238             /* Update potential sum for this i atom from the interaction with this j atom. */
1239             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1240             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1241             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1242
1243             fscal            = felec;
1244
1245             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1246
1247             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1248
1249             /* Update vectorial force */
1250             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1251             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1252             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1253             
1254             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1255             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1256             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1257
1258             }
1259
1260             /**************************
1261              * CALCULATE INTERACTIONS *
1262              **************************/
1263
1264             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1265             {
1266
1267             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1268
1269             /* EWALD ELECTROSTATICS */
1270
1271             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1272             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1273             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1274             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1275             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1276
1277             ewtabF           = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1278             ewtabD           = _fjsp_setzero_v2r8();
1279             GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1280             ewtabV           = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1281             ewtabFn          = _fjsp_setzero_v2r8();
1282             GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1283             felec            = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1284             velec            = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1285             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
1286             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1287
1288             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1289
1290             /* Update potential sum for this i atom from the interaction with this j atom. */
1291             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1292             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1293             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1294
1295             fscal            = felec;
1296
1297             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1298
1299             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1300
1301             /* Update vectorial force */
1302             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1303             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1304             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1305             
1306             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1307             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1308             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1309
1310             }
1311
1312             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1313
1314             /* Inner loop uses 459 flops */
1315         }
1316
1317         /* End of innermost loop */
1318
1319         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1320                                               f+i_coord_offset,fshift+i_shift_offset);
1321
1322         ggid                        = gid[iidx];
1323         /* Update potential energies */
1324         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1325         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1326
1327         /* Increment number of inner iterations */
1328         inneriter                  += j_index_end - j_index_start;
1329
1330         /* Outer loop uses 20 flops */
1331     }
1332
1333     /* Increment number of outer iterations */
1334     outeriter        += nri;
1335
1336     /* Update outer/inner flops */
1337
1338     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*459);
1339 }
1340 /*
1341  * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
1342  * Electrostatics interaction: Ewald
1343  * VdW interaction:            LennardJones
1344  * Geometry:                   Water3-Water3
1345  * Calculate force/pot:        Force
1346  */
1347 void
1348 nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
1349                     (t_nblist                    * gmx_restrict       nlist,
1350                      rvec                        * gmx_restrict          xx,
1351                      rvec                        * gmx_restrict          ff,
1352                      t_forcerec                  * gmx_restrict          fr,
1353                      t_mdatoms                   * gmx_restrict     mdatoms,
1354                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1355                      t_nrnb                      * gmx_restrict        nrnb)
1356 {
1357     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1358      * just 0 for non-waters.
1359      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1360      * jnr indices corresponding to data put in the four positions in the SIMD register.
1361      */
1362     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1363     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1364     int              jnrA,jnrB;
1365     int              j_coord_offsetA,j_coord_offsetB;
1366     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1367     real             rcutoff_scalar;
1368     real             *shiftvec,*fshift,*x,*f;
1369     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1370     int              vdwioffset0;
1371     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1372     int              vdwioffset1;
1373     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1374     int              vdwioffset2;
1375     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1376     int              vdwjidx0A,vdwjidx0B;
1377     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1378     int              vdwjidx1A,vdwjidx1B;
1379     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1380     int              vdwjidx2A,vdwjidx2B;
1381     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1382     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1383     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1384     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1385     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1386     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1387     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1388     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1389     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1390     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1391     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1392     real             *charge;
1393     int              nvdwtype;
1394     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1395     int              *vdwtype;
1396     real             *vdwparam;
1397     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1398     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1399     _fjsp_v2r8       ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1400     real             *ewtab;
1401     _fjsp_v2r8       itab_tmp;
1402     _fjsp_v2r8       dummy_mask,cutoff_mask;
1403     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1404     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1405     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1406
1407     x                = xx[0];
1408     f                = ff[0];
1409
1410     nri              = nlist->nri;
1411     iinr             = nlist->iinr;
1412     jindex           = nlist->jindex;
1413     jjnr             = nlist->jjnr;
1414     shiftidx         = nlist->shift;
1415     gid              = nlist->gid;
1416     shiftvec         = fr->shift_vec[0];
1417     fshift           = fr->fshift[0];
1418     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1419     charge           = mdatoms->chargeA;
1420     nvdwtype         = fr->ntype;
1421     vdwparam         = fr->nbfp;
1422     vdwtype          = mdatoms->typeA;
1423
1424     sh_ewald         = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1425     ewtab            = fr->ic->tabq_coul_F;
1426     ewtabscale       = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1427     ewtabhalfspace   = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1428
1429     /* Setup water-specific parameters */
1430     inr              = nlist->iinr[0];
1431     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1432     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1433     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1434     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1435
1436     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1437     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1438     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1439     vdwjidx0A        = 2*vdwtype[inr+0];
1440     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1441     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1442     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1443     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1444     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1445     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1446     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1447     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1448     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1449     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1450     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1451
1452     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1453     rcutoff_scalar   = fr->rcoulomb;
1454     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1455     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
1456
1457     sh_vdw_invrcut6  = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
1458     rvdw             = gmx_fjsp_set1_v2r8(fr->rvdw);
1459
1460     /* Avoid stupid compiler warnings */
1461     jnrA = jnrB = 0;
1462     j_coord_offsetA = 0;
1463     j_coord_offsetB = 0;
1464
1465     outeriter        = 0;
1466     inneriter        = 0;
1467
1468     /* Start outer loop over neighborlists */
1469     for(iidx=0; iidx<nri; iidx++)
1470     {
1471         /* Load shift vector for this list */
1472         i_shift_offset   = DIM*shiftidx[iidx];
1473
1474         /* Load limits for loop over neighbors */
1475         j_index_start    = jindex[iidx];
1476         j_index_end      = jindex[iidx+1];
1477
1478         /* Get outer coordinate index */
1479         inr              = iinr[iidx];
1480         i_coord_offset   = DIM*inr;
1481
1482         /* Load i particle coords and add shift vector */
1483         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1484                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1485
1486         fix0             = _fjsp_setzero_v2r8();
1487         fiy0             = _fjsp_setzero_v2r8();
1488         fiz0             = _fjsp_setzero_v2r8();
1489         fix1             = _fjsp_setzero_v2r8();
1490         fiy1             = _fjsp_setzero_v2r8();
1491         fiz1             = _fjsp_setzero_v2r8();
1492         fix2             = _fjsp_setzero_v2r8();
1493         fiy2             = _fjsp_setzero_v2r8();
1494         fiz2             = _fjsp_setzero_v2r8();
1495
1496         /* Start inner kernel loop */
1497         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1498         {
1499
1500             /* Get j neighbor index, and coordinate index */
1501             jnrA             = jjnr[jidx];
1502             jnrB             = jjnr[jidx+1];
1503             j_coord_offsetA  = DIM*jnrA;
1504             j_coord_offsetB  = DIM*jnrB;
1505
1506             /* load j atom coordinates */
1507             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1508                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1509
1510             /* Calculate displacement vector */
1511             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1512             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1513             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1514             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1515             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1516             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1517             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1518             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1519             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1520             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1521             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1522             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1523             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1524             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1525             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1526             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1527             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1528             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1529             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1530             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1531             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1532             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1533             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1534             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1535             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1536             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1537             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1538
1539             /* Calculate squared distance and things based on it */
1540             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1541             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1542             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1543             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1544             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1545             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1546             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1547             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1548             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1549
1550             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1551             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1552             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1553             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1554             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1555             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1556             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1557             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1558             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1559
1560             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1561             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1562             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1563             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1564             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1565             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1566             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1567             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1568             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1569
1570             fjx0             = _fjsp_setzero_v2r8();
1571             fjy0             = _fjsp_setzero_v2r8();
1572             fjz0             = _fjsp_setzero_v2r8();
1573             fjx1             = _fjsp_setzero_v2r8();
1574             fjy1             = _fjsp_setzero_v2r8();
1575             fjz1             = _fjsp_setzero_v2r8();
1576             fjx2             = _fjsp_setzero_v2r8();
1577             fjy2             = _fjsp_setzero_v2r8();
1578             fjz2             = _fjsp_setzero_v2r8();
1579
1580             /**************************
1581              * CALCULATE INTERACTIONS *
1582              **************************/
1583
1584             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1585             {
1586
1587             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1588
1589             /* EWALD ELECTROSTATICS */
1590
1591             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1592             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
1593             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1594             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1595             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1596
1597             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1598                                          &ewtabF,&ewtabFn);
1599             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1600             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1601
1602             /* LENNARD-JONES DISPERSION/REPULSION */
1603
1604             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1605             fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1606
1607             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1608
1609             fscal            = _fjsp_add_v2r8(felec,fvdw);
1610
1611             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1612
1613             /* Update vectorial force */
1614             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1615             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1616             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1617             
1618             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1619             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1620             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1621
1622             }
1623
1624             /**************************
1625              * CALCULATE INTERACTIONS *
1626              **************************/
1627
1628             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1629             {
1630
1631             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
1632
1633             /* EWALD ELECTROSTATICS */
1634
1635             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1636             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
1637             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1638             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1639             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1640
1641             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1642                                          &ewtabF,&ewtabFn);
1643             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1644             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1645
1646             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1647
1648             fscal            = felec;
1649
1650             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1651
1652             /* Update vectorial force */
1653             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1654             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1655             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1656             
1657             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1658             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1659             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1660
1661             }
1662
1663             /**************************
1664              * CALCULATE INTERACTIONS *
1665              **************************/
1666
1667             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1668             {
1669
1670             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
1671
1672             /* EWALD ELECTROSTATICS */
1673
1674             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1675             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
1676             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1677             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1678             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1679
1680             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1681                                          &ewtabF,&ewtabFn);
1682             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1683             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1684
1685             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1686
1687             fscal            = felec;
1688
1689             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1690
1691             /* Update vectorial force */
1692             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1693             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1694             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1695             
1696             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1697             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1698             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1699
1700             }
1701
1702             /**************************
1703              * CALCULATE INTERACTIONS *
1704              **************************/
1705
1706             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1707             {
1708
1709             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
1710
1711             /* EWALD ELECTROSTATICS */
1712
1713             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1714             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
1715             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1716             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1717             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1718
1719             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1720                                          &ewtabF,&ewtabFn);
1721             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1722             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1723
1724             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1725
1726             fscal            = felec;
1727
1728             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1729
1730             /* Update vectorial force */
1731             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1732             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1733             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1734             
1735             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1736             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1737             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1738
1739             }
1740
1741             /**************************
1742              * CALCULATE INTERACTIONS *
1743              **************************/
1744
1745             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1746             {
1747
1748             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
1749
1750             /* EWALD ELECTROSTATICS */
1751
1752             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1753             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
1754             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1755             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1756             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1757
1758             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1759                                          &ewtabF,&ewtabFn);
1760             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1761             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1762
1763             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1764
1765             fscal            = felec;
1766
1767             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1768
1769             /* Update vectorial force */
1770             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1771             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1772             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1773             
1774             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1775             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1776             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1777
1778             }
1779
1780             /**************************
1781              * CALCULATE INTERACTIONS *
1782              **************************/
1783
1784             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1785             {
1786
1787             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
1788
1789             /* EWALD ELECTROSTATICS */
1790
1791             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1792             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
1793             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1794             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1795             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1796
1797             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1798                                          &ewtabF,&ewtabFn);
1799             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1800             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1801
1802             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1803
1804             fscal            = felec;
1805
1806             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1807
1808             /* Update vectorial force */
1809             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1810             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1811             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1812             
1813             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1814             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1815             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1816
1817             }
1818
1819             /**************************
1820              * CALCULATE INTERACTIONS *
1821              **************************/
1822
1823             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1824             {
1825
1826             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
1827
1828             /* EWALD ELECTROSTATICS */
1829
1830             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1831             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
1832             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1833             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1834             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1835
1836             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1837                                          &ewtabF,&ewtabFn);
1838             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1839             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1840
1841             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1842
1843             fscal            = felec;
1844
1845             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1846
1847             /* Update vectorial force */
1848             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1849             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1850             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1851             
1852             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1853             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1854             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1855
1856             }
1857
1858             /**************************
1859              * CALCULATE INTERACTIONS *
1860              **************************/
1861
1862             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1863             {
1864
1865             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
1866
1867             /* EWALD ELECTROSTATICS */
1868
1869             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1870             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
1871             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1872             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1873             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1874
1875             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1876                                          &ewtabF,&ewtabFn);
1877             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1878             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1879
1880             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1881
1882             fscal            = felec;
1883
1884             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1885
1886             /* Update vectorial force */
1887             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1888             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1889             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1890             
1891             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1892             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1893             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1894
1895             }
1896
1897             /**************************
1898              * CALCULATE INTERACTIONS *
1899              **************************/
1900
1901             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1902             {
1903
1904             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
1905
1906             /* EWALD ELECTROSTATICS */
1907
1908             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1909             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
1910             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
1911             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1912             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1913
1914             gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1915                                          &ewtabF,&ewtabFn);
1916             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1917             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1918
1919             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1920
1921             fscal            = felec;
1922
1923             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1924
1925             /* Update vectorial force */
1926             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1927             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1928             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1929             
1930             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1931             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1932             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1933
1934             }
1935
1936             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1937
1938             /* Inner loop uses 385 flops */
1939         }
1940
1941         if(jidx<j_index_end)
1942         {
1943
1944             jnrA             = jjnr[jidx];
1945             j_coord_offsetA  = DIM*jnrA;
1946
1947             /* load j atom coordinates */
1948             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1949                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1950
1951             /* Calculate displacement vector */
1952             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1953             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1954             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1955             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1956             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1957             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1958             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1959             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1960             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1961             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1962             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1963             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1964             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1965             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1966             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1967             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1968             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1969             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1970             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1971             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1972             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1973             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1974             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1975             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1976             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1977             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1978             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1979
1980             /* Calculate squared distance and things based on it */
1981             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1982             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1983             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1984             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1985             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1986             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1987             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1988             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1989             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1990
1991             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1992             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1993             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1994             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1995             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1996             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1997             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1998             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1999             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
2000
2001             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
2002             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
2003             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
2004             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
2005             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
2006             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
2007             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
2008             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
2009             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
2010
2011             fjx0             = _fjsp_setzero_v2r8();
2012             fjy0             = _fjsp_setzero_v2r8();
2013             fjz0             = _fjsp_setzero_v2r8();
2014             fjx1             = _fjsp_setzero_v2r8();
2015             fjy1             = _fjsp_setzero_v2r8();
2016             fjz1             = _fjsp_setzero_v2r8();
2017             fjx2             = _fjsp_setzero_v2r8();
2018             fjy2             = _fjsp_setzero_v2r8();
2019             fjz2             = _fjsp_setzero_v2r8();
2020
2021             /**************************
2022              * CALCULATE INTERACTIONS *
2023              **************************/
2024
2025             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
2026             {
2027
2028             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
2029
2030             /* EWALD ELECTROSTATICS */
2031
2032             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2033             ewrt             = _fjsp_mul_v2r8(r00,ewtabscale);
2034             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2035             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2036             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2037
2038             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2039             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2040             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
2041
2042             /* LENNARD-JONES DISPERSION/REPULSION */
2043
2044             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
2045             fvdw             = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
2046
2047             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
2048
2049             fscal            = _fjsp_add_v2r8(felec,fvdw);
2050
2051             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2052
2053             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2054
2055             /* Update vectorial force */
2056             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
2057             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
2058             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
2059             
2060             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
2061             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
2062             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
2063
2064             }
2065
2066             /**************************
2067              * CALCULATE INTERACTIONS *
2068              **************************/
2069
2070             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
2071             {
2072
2073             r01              = _fjsp_mul_v2r8(rsq01,rinv01);
2074
2075             /* EWALD ELECTROSTATICS */
2076
2077             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2078             ewrt             = _fjsp_mul_v2r8(r01,ewtabscale);
2079             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2080             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2081             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2082
2083             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2084             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2085             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
2086
2087             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
2088
2089             fscal            = felec;
2090
2091             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2092
2093             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2094
2095             /* Update vectorial force */
2096             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
2097             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
2098             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
2099             
2100             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
2101             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
2102             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
2103
2104             }
2105
2106             /**************************
2107              * CALCULATE INTERACTIONS *
2108              **************************/
2109
2110             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
2111             {
2112
2113             r02              = _fjsp_mul_v2r8(rsq02,rinv02);
2114
2115             /* EWALD ELECTROSTATICS */
2116
2117             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2118             ewrt             = _fjsp_mul_v2r8(r02,ewtabscale);
2119             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2120             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2121             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2122
2123             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2124             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2125             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
2126
2127             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
2128
2129             fscal            = felec;
2130
2131             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2132
2133             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2134
2135             /* Update vectorial force */
2136             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
2137             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
2138             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
2139             
2140             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
2141             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
2142             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
2143
2144             }
2145
2146             /**************************
2147              * CALCULATE INTERACTIONS *
2148              **************************/
2149
2150             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
2151             {
2152
2153             r10              = _fjsp_mul_v2r8(rsq10,rinv10);
2154
2155             /* EWALD ELECTROSTATICS */
2156
2157             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2158             ewrt             = _fjsp_mul_v2r8(r10,ewtabscale);
2159             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2160             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2161             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2162
2163             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2164             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2165             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
2166
2167             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
2168
2169             fscal            = felec;
2170
2171             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2172
2173             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2174
2175             /* Update vectorial force */
2176             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
2177             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
2178             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
2179             
2180             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
2181             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
2182             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
2183
2184             }
2185
2186             /**************************
2187              * CALCULATE INTERACTIONS *
2188              **************************/
2189
2190             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
2191             {
2192
2193             r11              = _fjsp_mul_v2r8(rsq11,rinv11);
2194
2195             /* EWALD ELECTROSTATICS */
2196
2197             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2198             ewrt             = _fjsp_mul_v2r8(r11,ewtabscale);
2199             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2200             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2201             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2202
2203             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2204             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2205             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
2206
2207             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
2208
2209             fscal            = felec;
2210
2211             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2212
2213             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2214
2215             /* Update vectorial force */
2216             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
2217             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2218             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2219             
2220             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2221             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2222             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2223
2224             }
2225
2226             /**************************
2227              * CALCULATE INTERACTIONS *
2228              **************************/
2229
2230             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
2231             {
2232
2233             r12              = _fjsp_mul_v2r8(rsq12,rinv12);
2234
2235             /* EWALD ELECTROSTATICS */
2236
2237             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2238             ewrt             = _fjsp_mul_v2r8(r12,ewtabscale);
2239             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2240             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2241             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2242
2243             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2244             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2245             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
2246
2247             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
2248
2249             fscal            = felec;
2250
2251             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2252
2253             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2254
2255             /* Update vectorial force */
2256             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
2257             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2258             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2259             
2260             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2261             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2262             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2263
2264             }
2265
2266             /**************************
2267              * CALCULATE INTERACTIONS *
2268              **************************/
2269
2270             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
2271             {
2272
2273             r20              = _fjsp_mul_v2r8(rsq20,rinv20);
2274
2275             /* EWALD ELECTROSTATICS */
2276
2277             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2278             ewrt             = _fjsp_mul_v2r8(r20,ewtabscale);
2279             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2280             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2281             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2282
2283             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2284             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2285             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
2286
2287             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
2288
2289             fscal            = felec;
2290
2291             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2292
2293             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2294
2295             /* Update vectorial force */
2296             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
2297             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2298             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2299             
2300             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2301             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2302             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2303
2304             }
2305
2306             /**************************
2307              * CALCULATE INTERACTIONS *
2308              **************************/
2309
2310             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
2311             {
2312
2313             r21              = _fjsp_mul_v2r8(rsq21,rinv21);
2314
2315             /* EWALD ELECTROSTATICS */
2316
2317             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2318             ewrt             = _fjsp_mul_v2r8(r21,ewtabscale);
2319             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2320             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2321             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2322
2323             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2324             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2325             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
2326
2327             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
2328
2329             fscal            = felec;
2330
2331             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2332
2333             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2334
2335             /* Update vectorial force */
2336             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
2337             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2338             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2339             
2340             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2341             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2342             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2343
2344             }
2345
2346             /**************************
2347              * CALCULATE INTERACTIONS *
2348              **************************/
2349
2350             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
2351             {
2352
2353             r22              = _fjsp_mul_v2r8(rsq22,rinv22);
2354
2355             /* EWALD ELECTROSTATICS */
2356
2357             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2358             ewrt             = _fjsp_mul_v2r8(r22,ewtabscale);
2359             itab_tmp         = _fjsp_dtox_v2r8(ewrt);
2360             eweps            = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2361             _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2362
2363             gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2364             felec            = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2365             felec            = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
2366
2367             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
2368
2369             fscal            = felec;
2370
2371             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2372
2373             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2374
2375             /* Update vectorial force */
2376             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
2377             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2378             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2379             
2380             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2381             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2382             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2383
2384             }
2385
2386             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2387
2388             /* Inner loop uses 385 flops */
2389         }
2390
2391         /* End of innermost loop */
2392
2393         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2394                                               f+i_coord_offset,fshift+i_shift_offset);
2395
2396         /* Increment number of inner iterations */
2397         inneriter                  += j_index_end - j_index_start;
2398
2399         /* Outer loop uses 18 flops */
2400     }
2401
2402     /* Increment number of outer iterations */
2403     outeriter        += nri;
2404
2405     /* Update outer/inner flops */
2406
2407     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*385);
2408 }