Merge release-5-0 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "gmxpre.h"
39
40 #include "config.h"
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "gromacs/legacyheaders/types/simple.h"
46 #include "gromacs/math/vec.h"
47 #include "gromacs/legacyheaders/nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: ReactionField
54  * VdW interaction:            CubicSplineTable
55  * Geometry:                   Water4-Water4
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset0;
82     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
83     int              vdwioffset1;
84     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
85     int              vdwioffset2;
86     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87     int              vdwioffset3;
88     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
89     int              vdwjidx0A,vdwjidx0B;
90     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
91     int              vdwjidx1A,vdwjidx1B;
92     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
93     int              vdwjidx2A,vdwjidx2B;
94     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
95     int              vdwjidx3A,vdwjidx3B;
96     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
97     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
98     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
99     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
100     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
101     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
102     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
103     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
104     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
105     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
106     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
107     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
108     real             *charge;
109     int              nvdwtype;
110     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
111     int              *vdwtype;
112     real             *vdwparam;
113     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
114     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
115     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
116     real             *vftab;
117     _fjsp_v2r8       itab_tmp;
118     _fjsp_v2r8       dummy_mask,cutoff_mask;
119     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
120     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
121     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
122
123     x                = xx[0];
124     f                = ff[0];
125
126     nri              = nlist->nri;
127     iinr             = nlist->iinr;
128     jindex           = nlist->jindex;
129     jjnr             = nlist->jjnr;
130     shiftidx         = nlist->shift;
131     gid              = nlist->gid;
132     shiftvec         = fr->shift_vec[0];
133     fshift           = fr->fshift[0];
134     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
135     charge           = mdatoms->chargeA;
136     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
137     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
138     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
139     nvdwtype         = fr->ntype;
140     vdwparam         = fr->nbfp;
141     vdwtype          = mdatoms->typeA;
142
143     vftab            = kernel_data->table_vdw->data;
144     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
145
146     /* Setup water-specific parameters */
147     inr              = nlist->iinr[0];
148     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
149     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
150     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
151     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
152
153     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
154     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
155     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
156     vdwjidx0A        = 2*vdwtype[inr+0];
157     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
158     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
159     qq11             = _fjsp_mul_v2r8(iq1,jq1);
160     qq12             = _fjsp_mul_v2r8(iq1,jq2);
161     qq13             = _fjsp_mul_v2r8(iq1,jq3);
162     qq21             = _fjsp_mul_v2r8(iq2,jq1);
163     qq22             = _fjsp_mul_v2r8(iq2,jq2);
164     qq23             = _fjsp_mul_v2r8(iq2,jq3);
165     qq31             = _fjsp_mul_v2r8(iq3,jq1);
166     qq32             = _fjsp_mul_v2r8(iq3,jq2);
167     qq33             = _fjsp_mul_v2r8(iq3,jq3);
168
169     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
170     rcutoff_scalar   = fr->rcoulomb;
171     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
172     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
173
174     /* Avoid stupid compiler warnings */
175     jnrA = jnrB = 0;
176     j_coord_offsetA = 0;
177     j_coord_offsetB = 0;
178
179     outeriter        = 0;
180     inneriter        = 0;
181
182     /* Start outer loop over neighborlists */
183     for(iidx=0; iidx<nri; iidx++)
184     {
185         /* Load shift vector for this list */
186         i_shift_offset   = DIM*shiftidx[iidx];
187
188         /* Load limits for loop over neighbors */
189         j_index_start    = jindex[iidx];
190         j_index_end      = jindex[iidx+1];
191
192         /* Get outer coordinate index */
193         inr              = iinr[iidx];
194         i_coord_offset   = DIM*inr;
195
196         /* Load i particle coords and add shift vector */
197         gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
198                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
199
200         fix0             = _fjsp_setzero_v2r8();
201         fiy0             = _fjsp_setzero_v2r8();
202         fiz0             = _fjsp_setzero_v2r8();
203         fix1             = _fjsp_setzero_v2r8();
204         fiy1             = _fjsp_setzero_v2r8();
205         fiz1             = _fjsp_setzero_v2r8();
206         fix2             = _fjsp_setzero_v2r8();
207         fiy2             = _fjsp_setzero_v2r8();
208         fiz2             = _fjsp_setzero_v2r8();
209         fix3             = _fjsp_setzero_v2r8();
210         fiy3             = _fjsp_setzero_v2r8();
211         fiz3             = _fjsp_setzero_v2r8();
212
213         /* Reset potential sums */
214         velecsum         = _fjsp_setzero_v2r8();
215         vvdwsum          = _fjsp_setzero_v2r8();
216
217         /* Start inner kernel loop */
218         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
219         {
220
221             /* Get j neighbor index, and coordinate index */
222             jnrA             = jjnr[jidx];
223             jnrB             = jjnr[jidx+1];
224             j_coord_offsetA  = DIM*jnrA;
225             j_coord_offsetB  = DIM*jnrB;
226
227             /* load j atom coordinates */
228             gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
229                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
230                                               &jy2,&jz2,&jx3,&jy3,&jz3);
231
232             /* Calculate displacement vector */
233             dx00             = _fjsp_sub_v2r8(ix0,jx0);
234             dy00             = _fjsp_sub_v2r8(iy0,jy0);
235             dz00             = _fjsp_sub_v2r8(iz0,jz0);
236             dx11             = _fjsp_sub_v2r8(ix1,jx1);
237             dy11             = _fjsp_sub_v2r8(iy1,jy1);
238             dz11             = _fjsp_sub_v2r8(iz1,jz1);
239             dx12             = _fjsp_sub_v2r8(ix1,jx2);
240             dy12             = _fjsp_sub_v2r8(iy1,jy2);
241             dz12             = _fjsp_sub_v2r8(iz1,jz2);
242             dx13             = _fjsp_sub_v2r8(ix1,jx3);
243             dy13             = _fjsp_sub_v2r8(iy1,jy3);
244             dz13             = _fjsp_sub_v2r8(iz1,jz3);
245             dx21             = _fjsp_sub_v2r8(ix2,jx1);
246             dy21             = _fjsp_sub_v2r8(iy2,jy1);
247             dz21             = _fjsp_sub_v2r8(iz2,jz1);
248             dx22             = _fjsp_sub_v2r8(ix2,jx2);
249             dy22             = _fjsp_sub_v2r8(iy2,jy2);
250             dz22             = _fjsp_sub_v2r8(iz2,jz2);
251             dx23             = _fjsp_sub_v2r8(ix2,jx3);
252             dy23             = _fjsp_sub_v2r8(iy2,jy3);
253             dz23             = _fjsp_sub_v2r8(iz2,jz3);
254             dx31             = _fjsp_sub_v2r8(ix3,jx1);
255             dy31             = _fjsp_sub_v2r8(iy3,jy1);
256             dz31             = _fjsp_sub_v2r8(iz3,jz1);
257             dx32             = _fjsp_sub_v2r8(ix3,jx2);
258             dy32             = _fjsp_sub_v2r8(iy3,jy2);
259             dz32             = _fjsp_sub_v2r8(iz3,jz2);
260             dx33             = _fjsp_sub_v2r8(ix3,jx3);
261             dy33             = _fjsp_sub_v2r8(iy3,jy3);
262             dz33             = _fjsp_sub_v2r8(iz3,jz3);
263
264             /* Calculate squared distance and things based on it */
265             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
266             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
267             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
268             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
269             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
270             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
271             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
272             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
273             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
274             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
275
276             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
277             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
278             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
279             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
280             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
281             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
282             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
283             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
284             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
285             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
286
287             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
288             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
289             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
290             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
291             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
292             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
293             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
294             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
295             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
296
297             fjx0             = _fjsp_setzero_v2r8();
298             fjy0             = _fjsp_setzero_v2r8();
299             fjz0             = _fjsp_setzero_v2r8();
300             fjx1             = _fjsp_setzero_v2r8();
301             fjy1             = _fjsp_setzero_v2r8();
302             fjz1             = _fjsp_setzero_v2r8();
303             fjx2             = _fjsp_setzero_v2r8();
304             fjy2             = _fjsp_setzero_v2r8();
305             fjz2             = _fjsp_setzero_v2r8();
306             fjx3             = _fjsp_setzero_v2r8();
307             fjy3             = _fjsp_setzero_v2r8();
308             fjz3             = _fjsp_setzero_v2r8();
309
310             /**************************
311              * CALCULATE INTERACTIONS *
312              **************************/
313
314             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
315
316             /* Calculate table index by multiplying r with table scale and truncate to integer */
317             rt               = _fjsp_mul_v2r8(r00,vftabscale);
318             itab_tmp         = _fjsp_dtox_v2r8(rt);
319             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
320             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
321             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
322
323             vfconv.i[0]     *= 8;
324             vfconv.i[1]     *= 8;
325
326             /* CUBIC SPLINE TABLE DISPERSION */
327             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
328             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
329             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
330             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
331             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
332             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
333             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
334             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
335             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
336             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
337             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
338
339             /* CUBIC SPLINE TABLE REPULSION */
340             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
341             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
342             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
343             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
344             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
345             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
346             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
347             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
348             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
349             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
350             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
351             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
352             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
353
354             /* Update potential sum for this i atom from the interaction with this j atom. */
355             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
356
357             fscal            = fvdw;
358
359             /* Update vectorial force */
360             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
361             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
362             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
363             
364             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
365             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
366             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
367
368             /**************************
369              * CALCULATE INTERACTIONS *
370              **************************/
371
372             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
373             {
374
375             /* REACTION-FIELD ELECTROSTATICS */
376             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
377             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
378
379             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
380
381             /* Update potential sum for this i atom from the interaction with this j atom. */
382             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
383             velecsum         = _fjsp_add_v2r8(velecsum,velec);
384
385             fscal            = felec;
386
387             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
388
389             /* Update vectorial force */
390             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
391             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
392             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
393             
394             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
395             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
396             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
397
398             }
399
400             /**************************
401              * CALCULATE INTERACTIONS *
402              **************************/
403
404             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
405             {
406
407             /* REACTION-FIELD ELECTROSTATICS */
408             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
409             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
410
411             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
412
413             /* Update potential sum for this i atom from the interaction with this j atom. */
414             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
415             velecsum         = _fjsp_add_v2r8(velecsum,velec);
416
417             fscal            = felec;
418
419             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
420
421             /* Update vectorial force */
422             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
423             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
424             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
425             
426             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
427             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
428             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
429
430             }
431
432             /**************************
433              * CALCULATE INTERACTIONS *
434              **************************/
435
436             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
437             {
438
439             /* REACTION-FIELD ELECTROSTATICS */
440             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
441             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
442
443             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
444
445             /* Update potential sum for this i atom from the interaction with this j atom. */
446             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
447             velecsum         = _fjsp_add_v2r8(velecsum,velec);
448
449             fscal            = felec;
450
451             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
452
453             /* Update vectorial force */
454             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
455             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
456             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
457             
458             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
459             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
460             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
461
462             }
463
464             /**************************
465              * CALCULATE INTERACTIONS *
466              **************************/
467
468             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
469             {
470
471             /* REACTION-FIELD ELECTROSTATICS */
472             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
473             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
474
475             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
476
477             /* Update potential sum for this i atom from the interaction with this j atom. */
478             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
479             velecsum         = _fjsp_add_v2r8(velecsum,velec);
480
481             fscal            = felec;
482
483             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
484
485             /* Update vectorial force */
486             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
487             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
488             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
489             
490             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
491             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
492             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
493
494             }
495
496             /**************************
497              * CALCULATE INTERACTIONS *
498              **************************/
499
500             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
501             {
502
503             /* REACTION-FIELD ELECTROSTATICS */
504             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
505             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
506
507             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
508
509             /* Update potential sum for this i atom from the interaction with this j atom. */
510             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
511             velecsum         = _fjsp_add_v2r8(velecsum,velec);
512
513             fscal            = felec;
514
515             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
516
517             /* Update vectorial force */
518             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
519             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
520             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
521             
522             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
523             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
524             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
525
526             }
527
528             /**************************
529              * CALCULATE INTERACTIONS *
530              **************************/
531
532             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
533             {
534
535             /* REACTION-FIELD ELECTROSTATICS */
536             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
537             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
538
539             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
540
541             /* Update potential sum for this i atom from the interaction with this j atom. */
542             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
543             velecsum         = _fjsp_add_v2r8(velecsum,velec);
544
545             fscal            = felec;
546
547             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
548
549             /* Update vectorial force */
550             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
551             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
552             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
553             
554             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
555             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
556             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
557
558             }
559
560             /**************************
561              * CALCULATE INTERACTIONS *
562              **************************/
563
564             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
565             {
566
567             /* REACTION-FIELD ELECTROSTATICS */
568             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
569             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
570
571             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
572
573             /* Update potential sum for this i atom from the interaction with this j atom. */
574             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
575             velecsum         = _fjsp_add_v2r8(velecsum,velec);
576
577             fscal            = felec;
578
579             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
580
581             /* Update vectorial force */
582             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
583             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
584             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
585             
586             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
587             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
588             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
589
590             }
591
592             /**************************
593              * CALCULATE INTERACTIONS *
594              **************************/
595
596             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
597             {
598
599             /* REACTION-FIELD ELECTROSTATICS */
600             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
601             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
602
603             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
604
605             /* Update potential sum for this i atom from the interaction with this j atom. */
606             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
607             velecsum         = _fjsp_add_v2r8(velecsum,velec);
608
609             fscal            = felec;
610
611             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
612
613             /* Update vectorial force */
614             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
615             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
616             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
617             
618             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
619             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
620             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
621
622             }
623
624             /**************************
625              * CALCULATE INTERACTIONS *
626              **************************/
627
628             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
629             {
630
631             /* REACTION-FIELD ELECTROSTATICS */
632             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
633             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
634
635             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
636
637             /* Update potential sum for this i atom from the interaction with this j atom. */
638             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
639             velecsum         = _fjsp_add_v2r8(velecsum,velec);
640
641             fscal            = felec;
642
643             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
644
645             /* Update vectorial force */
646             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
647             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
648             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
649             
650             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
651             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
652             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
653
654             }
655
656             gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
657
658             /* Inner loop uses 413 flops */
659         }
660
661         if(jidx<j_index_end)
662         {
663
664             jnrA             = jjnr[jidx];
665             j_coord_offsetA  = DIM*jnrA;
666
667             /* load j atom coordinates */
668             gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
669                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
670                                               &jy2,&jz2,&jx3,&jy3,&jz3);
671
672             /* Calculate displacement vector */
673             dx00             = _fjsp_sub_v2r8(ix0,jx0);
674             dy00             = _fjsp_sub_v2r8(iy0,jy0);
675             dz00             = _fjsp_sub_v2r8(iz0,jz0);
676             dx11             = _fjsp_sub_v2r8(ix1,jx1);
677             dy11             = _fjsp_sub_v2r8(iy1,jy1);
678             dz11             = _fjsp_sub_v2r8(iz1,jz1);
679             dx12             = _fjsp_sub_v2r8(ix1,jx2);
680             dy12             = _fjsp_sub_v2r8(iy1,jy2);
681             dz12             = _fjsp_sub_v2r8(iz1,jz2);
682             dx13             = _fjsp_sub_v2r8(ix1,jx3);
683             dy13             = _fjsp_sub_v2r8(iy1,jy3);
684             dz13             = _fjsp_sub_v2r8(iz1,jz3);
685             dx21             = _fjsp_sub_v2r8(ix2,jx1);
686             dy21             = _fjsp_sub_v2r8(iy2,jy1);
687             dz21             = _fjsp_sub_v2r8(iz2,jz1);
688             dx22             = _fjsp_sub_v2r8(ix2,jx2);
689             dy22             = _fjsp_sub_v2r8(iy2,jy2);
690             dz22             = _fjsp_sub_v2r8(iz2,jz2);
691             dx23             = _fjsp_sub_v2r8(ix2,jx3);
692             dy23             = _fjsp_sub_v2r8(iy2,jy3);
693             dz23             = _fjsp_sub_v2r8(iz2,jz3);
694             dx31             = _fjsp_sub_v2r8(ix3,jx1);
695             dy31             = _fjsp_sub_v2r8(iy3,jy1);
696             dz31             = _fjsp_sub_v2r8(iz3,jz1);
697             dx32             = _fjsp_sub_v2r8(ix3,jx2);
698             dy32             = _fjsp_sub_v2r8(iy3,jy2);
699             dz32             = _fjsp_sub_v2r8(iz3,jz2);
700             dx33             = _fjsp_sub_v2r8(ix3,jx3);
701             dy33             = _fjsp_sub_v2r8(iy3,jy3);
702             dz33             = _fjsp_sub_v2r8(iz3,jz3);
703
704             /* Calculate squared distance and things based on it */
705             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
706             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
707             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
708             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
709             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
710             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
711             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
712             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
713             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
714             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
715
716             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
717             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
718             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
719             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
720             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
721             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
722             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
723             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
724             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
725             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
726
727             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
728             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
729             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
730             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
731             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
732             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
733             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
734             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
735             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
736
737             fjx0             = _fjsp_setzero_v2r8();
738             fjy0             = _fjsp_setzero_v2r8();
739             fjz0             = _fjsp_setzero_v2r8();
740             fjx1             = _fjsp_setzero_v2r8();
741             fjy1             = _fjsp_setzero_v2r8();
742             fjz1             = _fjsp_setzero_v2r8();
743             fjx2             = _fjsp_setzero_v2r8();
744             fjy2             = _fjsp_setzero_v2r8();
745             fjz2             = _fjsp_setzero_v2r8();
746             fjx3             = _fjsp_setzero_v2r8();
747             fjy3             = _fjsp_setzero_v2r8();
748             fjz3             = _fjsp_setzero_v2r8();
749
750             /**************************
751              * CALCULATE INTERACTIONS *
752              **************************/
753
754             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
755
756             /* Calculate table index by multiplying r with table scale and truncate to integer */
757             rt               = _fjsp_mul_v2r8(r00,vftabscale);
758             itab_tmp         = _fjsp_dtox_v2r8(rt);
759             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
760             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
761             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
762
763             vfconv.i[0]     *= 8;
764             vfconv.i[1]     *= 8;
765
766             /* CUBIC SPLINE TABLE DISPERSION */
767             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
768             F                = _fjsp_setzero_v2r8();
769             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
770             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
771             H                = _fjsp_setzero_v2r8();
772             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
773             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
774             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
775             vvdw6            = _fjsp_mul_v2r8(c6_00,VV);
776             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
777             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
778
779             /* CUBIC SPLINE TABLE REPULSION */
780             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
781             F                = _fjsp_setzero_v2r8();
782             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
783             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
784             H                = _fjsp_setzero_v2r8();
785             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
786             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
787             VV               = _fjsp_madd_v2r8(vfeps,Fp,Y);
788             vvdw12           = _fjsp_mul_v2r8(c12_00,VV);
789             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
790             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
791             vvdw             = _fjsp_add_v2r8(vvdw12,vvdw6);
792             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
793
794             /* Update potential sum for this i atom from the interaction with this j atom. */
795             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
796             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
797
798             fscal            = fvdw;
799
800             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
801
802             /* Update vectorial force */
803             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
804             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
805             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
806             
807             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
808             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
809             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
810
811             /**************************
812              * CALCULATE INTERACTIONS *
813              **************************/
814
815             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
816             {
817
818             /* REACTION-FIELD ELECTROSTATICS */
819             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
820             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
821
822             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
823
824             /* Update potential sum for this i atom from the interaction with this j atom. */
825             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
826             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
827             velecsum         = _fjsp_add_v2r8(velecsum,velec);
828
829             fscal            = felec;
830
831             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
832
833             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
834
835             /* Update vectorial force */
836             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
837             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
838             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
839             
840             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
841             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
842             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
843
844             }
845
846             /**************************
847              * CALCULATE INTERACTIONS *
848              **************************/
849
850             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
851             {
852
853             /* REACTION-FIELD ELECTROSTATICS */
854             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
855             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
856
857             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
858
859             /* Update potential sum for this i atom from the interaction with this j atom. */
860             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
861             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
862             velecsum         = _fjsp_add_v2r8(velecsum,velec);
863
864             fscal            = felec;
865
866             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
867
868             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
869
870             /* Update vectorial force */
871             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
872             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
873             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
874             
875             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
876             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
877             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
878
879             }
880
881             /**************************
882              * CALCULATE INTERACTIONS *
883              **************************/
884
885             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
886             {
887
888             /* REACTION-FIELD ELECTROSTATICS */
889             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
890             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
891
892             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
893
894             /* Update potential sum for this i atom from the interaction with this j atom. */
895             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
896             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
897             velecsum         = _fjsp_add_v2r8(velecsum,velec);
898
899             fscal            = felec;
900
901             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
902
903             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
904
905             /* Update vectorial force */
906             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
907             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
908             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
909             
910             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
911             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
912             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
913
914             }
915
916             /**************************
917              * CALCULATE INTERACTIONS *
918              **************************/
919
920             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
921             {
922
923             /* REACTION-FIELD ELECTROSTATICS */
924             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
925             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
926
927             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
928
929             /* Update potential sum for this i atom from the interaction with this j atom. */
930             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
931             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
932             velecsum         = _fjsp_add_v2r8(velecsum,velec);
933
934             fscal            = felec;
935
936             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
937
938             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
939
940             /* Update vectorial force */
941             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
942             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
943             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
944             
945             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
946             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
947             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
948
949             }
950
951             /**************************
952              * CALCULATE INTERACTIONS *
953              **************************/
954
955             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
956             {
957
958             /* REACTION-FIELD ELECTROSTATICS */
959             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
960             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
961
962             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
963
964             /* Update potential sum for this i atom from the interaction with this j atom. */
965             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
966             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
967             velecsum         = _fjsp_add_v2r8(velecsum,velec);
968
969             fscal            = felec;
970
971             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
972
973             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
974
975             /* Update vectorial force */
976             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
977             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
978             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
979             
980             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
981             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
982             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
983
984             }
985
986             /**************************
987              * CALCULATE INTERACTIONS *
988              **************************/
989
990             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
991             {
992
993             /* REACTION-FIELD ELECTROSTATICS */
994             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
995             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
996
997             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
998
999             /* Update potential sum for this i atom from the interaction with this j atom. */
1000             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1001             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1002             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1003
1004             fscal            = felec;
1005
1006             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1007
1008             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1009
1010             /* Update vectorial force */
1011             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1012             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1013             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1014             
1015             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1016             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1017             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1018
1019             }
1020
1021             /**************************
1022              * CALCULATE INTERACTIONS *
1023              **************************/
1024
1025             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1026             {
1027
1028             /* REACTION-FIELD ELECTROSTATICS */
1029             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
1030             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1031
1032             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1033
1034             /* Update potential sum for this i atom from the interaction with this j atom. */
1035             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1036             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1037             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1038
1039             fscal            = felec;
1040
1041             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1042
1043             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1044
1045             /* Update vectorial force */
1046             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1047             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1048             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1049             
1050             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1051             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1052             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1053
1054             }
1055
1056             /**************************
1057              * CALCULATE INTERACTIONS *
1058              **************************/
1059
1060             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1061             {
1062
1063             /* REACTION-FIELD ELECTROSTATICS */
1064             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
1065             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1066
1067             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1068
1069             /* Update potential sum for this i atom from the interaction with this j atom. */
1070             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1071             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1072             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1073
1074             fscal            = felec;
1075
1076             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1077
1078             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1079
1080             /* Update vectorial force */
1081             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1082             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1083             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1084             
1085             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1086             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1087             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1088
1089             }
1090
1091             /**************************
1092              * CALCULATE INTERACTIONS *
1093              **************************/
1094
1095             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1096             {
1097
1098             /* REACTION-FIELD ELECTROSTATICS */
1099             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
1100             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1101
1102             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1103
1104             /* Update potential sum for this i atom from the interaction with this j atom. */
1105             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1106             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1107             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1108
1109             fscal            = felec;
1110
1111             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1112
1113             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1114
1115             /* Update vectorial force */
1116             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1117             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1118             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1119             
1120             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1121             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1122             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1123
1124             }
1125
1126             gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1127
1128             /* Inner loop uses 413 flops */
1129         }
1130
1131         /* End of innermost loop */
1132
1133         gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1134                                               f+i_coord_offset,fshift+i_shift_offset);
1135
1136         ggid                        = gid[iidx];
1137         /* Update potential energies */
1138         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1139         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1140
1141         /* Increment number of inner iterations */
1142         inneriter                  += j_index_end - j_index_start;
1143
1144         /* Outer loop uses 26 flops */
1145     }
1146
1147     /* Increment number of outer iterations */
1148     outeriter        += nri;
1149
1150     /* Update outer/inner flops */
1151
1152     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*413);
1153 }
1154 /*
1155  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
1156  * Electrostatics interaction: ReactionField
1157  * VdW interaction:            CubicSplineTable
1158  * Geometry:                   Water4-Water4
1159  * Calculate force/pot:        Force
1160  */
1161 void
1162 nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
1163                     (t_nblist                    * gmx_restrict       nlist,
1164                      rvec                        * gmx_restrict          xx,
1165                      rvec                        * gmx_restrict          ff,
1166                      t_forcerec                  * gmx_restrict          fr,
1167                      t_mdatoms                   * gmx_restrict     mdatoms,
1168                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1169                      t_nrnb                      * gmx_restrict        nrnb)
1170 {
1171     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1172      * just 0 for non-waters.
1173      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1174      * jnr indices corresponding to data put in the four positions in the SIMD register.
1175      */
1176     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1177     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1178     int              jnrA,jnrB;
1179     int              j_coord_offsetA,j_coord_offsetB;
1180     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1181     real             rcutoff_scalar;
1182     real             *shiftvec,*fshift,*x,*f;
1183     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1184     int              vdwioffset0;
1185     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1186     int              vdwioffset1;
1187     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1188     int              vdwioffset2;
1189     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1190     int              vdwioffset3;
1191     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1192     int              vdwjidx0A,vdwjidx0B;
1193     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1194     int              vdwjidx1A,vdwjidx1B;
1195     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1196     int              vdwjidx2A,vdwjidx2B;
1197     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1198     int              vdwjidx3A,vdwjidx3B;
1199     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1200     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1201     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1202     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1203     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1204     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1205     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1206     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1207     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1208     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1209     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1210     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1211     real             *charge;
1212     int              nvdwtype;
1213     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1214     int              *vdwtype;
1215     real             *vdwparam;
1216     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1217     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1218     _fjsp_v2r8       rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1219     real             *vftab;
1220     _fjsp_v2r8       itab_tmp;
1221     _fjsp_v2r8       dummy_mask,cutoff_mask;
1222     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1223     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1224     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1225
1226     x                = xx[0];
1227     f                = ff[0];
1228
1229     nri              = nlist->nri;
1230     iinr             = nlist->iinr;
1231     jindex           = nlist->jindex;
1232     jjnr             = nlist->jjnr;
1233     shiftidx         = nlist->shift;
1234     gid              = nlist->gid;
1235     shiftvec         = fr->shift_vec[0];
1236     fshift           = fr->fshift[0];
1237     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1238     charge           = mdatoms->chargeA;
1239     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1240     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1241     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1242     nvdwtype         = fr->ntype;
1243     vdwparam         = fr->nbfp;
1244     vdwtype          = mdatoms->typeA;
1245
1246     vftab            = kernel_data->table_vdw->data;
1247     vftabscale       = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
1248
1249     /* Setup water-specific parameters */
1250     inr              = nlist->iinr[0];
1251     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1252     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1253     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1254     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1255
1256     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1257     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1258     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
1259     vdwjidx0A        = 2*vdwtype[inr+0];
1260     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1261     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1262     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1263     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1264     qq13             = _fjsp_mul_v2r8(iq1,jq3);
1265     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1266     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1267     qq23             = _fjsp_mul_v2r8(iq2,jq3);
1268     qq31             = _fjsp_mul_v2r8(iq3,jq1);
1269     qq32             = _fjsp_mul_v2r8(iq3,jq2);
1270     qq33             = _fjsp_mul_v2r8(iq3,jq3);
1271
1272     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1273     rcutoff_scalar   = fr->rcoulomb;
1274     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1275     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
1276
1277     /* Avoid stupid compiler warnings */
1278     jnrA = jnrB = 0;
1279     j_coord_offsetA = 0;
1280     j_coord_offsetB = 0;
1281
1282     outeriter        = 0;
1283     inneriter        = 0;
1284
1285     /* Start outer loop over neighborlists */
1286     for(iidx=0; iidx<nri; iidx++)
1287     {
1288         /* Load shift vector for this list */
1289         i_shift_offset   = DIM*shiftidx[iidx];
1290
1291         /* Load limits for loop over neighbors */
1292         j_index_start    = jindex[iidx];
1293         j_index_end      = jindex[iidx+1];
1294
1295         /* Get outer coordinate index */
1296         inr              = iinr[iidx];
1297         i_coord_offset   = DIM*inr;
1298
1299         /* Load i particle coords and add shift vector */
1300         gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1301                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1302
1303         fix0             = _fjsp_setzero_v2r8();
1304         fiy0             = _fjsp_setzero_v2r8();
1305         fiz0             = _fjsp_setzero_v2r8();
1306         fix1             = _fjsp_setzero_v2r8();
1307         fiy1             = _fjsp_setzero_v2r8();
1308         fiz1             = _fjsp_setzero_v2r8();
1309         fix2             = _fjsp_setzero_v2r8();
1310         fiy2             = _fjsp_setzero_v2r8();
1311         fiz2             = _fjsp_setzero_v2r8();
1312         fix3             = _fjsp_setzero_v2r8();
1313         fiy3             = _fjsp_setzero_v2r8();
1314         fiz3             = _fjsp_setzero_v2r8();
1315
1316         /* Start inner kernel loop */
1317         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1318         {
1319
1320             /* Get j neighbor index, and coordinate index */
1321             jnrA             = jjnr[jidx];
1322             jnrB             = jjnr[jidx+1];
1323             j_coord_offsetA  = DIM*jnrA;
1324             j_coord_offsetB  = DIM*jnrB;
1325
1326             /* load j atom coordinates */
1327             gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1328                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1329                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1330
1331             /* Calculate displacement vector */
1332             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1333             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1334             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1335             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1336             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1337             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1338             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1339             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1340             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1341             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1342             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1343             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1344             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1345             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1346             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1347             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1348             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1349             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1350             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1351             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1352             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1353             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1354             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1355             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1356             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1357             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1358             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1359             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1360             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1361             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1362
1363             /* Calculate squared distance and things based on it */
1364             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1365             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1366             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1367             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1368             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1369             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1370             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1371             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1372             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1373             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1374
1375             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1376             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1377             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1378             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1379             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1380             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1381             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1382             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1383             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1384             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1385
1386             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1387             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1388             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1389             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1390             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1391             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1392             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1393             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1394             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1395
1396             fjx0             = _fjsp_setzero_v2r8();
1397             fjy0             = _fjsp_setzero_v2r8();
1398             fjz0             = _fjsp_setzero_v2r8();
1399             fjx1             = _fjsp_setzero_v2r8();
1400             fjy1             = _fjsp_setzero_v2r8();
1401             fjz1             = _fjsp_setzero_v2r8();
1402             fjx2             = _fjsp_setzero_v2r8();
1403             fjy2             = _fjsp_setzero_v2r8();
1404             fjz2             = _fjsp_setzero_v2r8();
1405             fjx3             = _fjsp_setzero_v2r8();
1406             fjy3             = _fjsp_setzero_v2r8();
1407             fjz3             = _fjsp_setzero_v2r8();
1408
1409             /**************************
1410              * CALCULATE INTERACTIONS *
1411              **************************/
1412
1413             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1414
1415             /* Calculate table index by multiplying r with table scale and truncate to integer */
1416             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1417             itab_tmp         = _fjsp_dtox_v2r8(rt);
1418             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1419             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1420             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1421
1422             vfconv.i[0]     *= 8;
1423             vfconv.i[1]     *= 8;
1424
1425             /* CUBIC SPLINE TABLE DISPERSION */
1426             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1427             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1428             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1429             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1430             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1431             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1432             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1433             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1434             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1435
1436             /* CUBIC SPLINE TABLE REPULSION */
1437             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1438             F                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1439             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1440             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1441             H                = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1442             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1443             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1444             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1445             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1446             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1447
1448             fscal            = fvdw;
1449
1450             /* Update vectorial force */
1451             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1452             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1453             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1454             
1455             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1456             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1457             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1458
1459             /**************************
1460              * CALCULATE INTERACTIONS *
1461              **************************/
1462
1463             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1464             {
1465
1466             /* REACTION-FIELD ELECTROSTATICS */
1467             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1468
1469             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1470
1471             fscal            = felec;
1472
1473             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1474
1475             /* Update vectorial force */
1476             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1477             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1478             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1479             
1480             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1481             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1482             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1483
1484             }
1485
1486             /**************************
1487              * CALCULATE INTERACTIONS *
1488              **************************/
1489
1490             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1491             {
1492
1493             /* REACTION-FIELD ELECTROSTATICS */
1494             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1495
1496             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1497
1498             fscal            = felec;
1499
1500             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1501
1502             /* Update vectorial force */
1503             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1504             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1505             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1506             
1507             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1508             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1509             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1510
1511             }
1512
1513             /**************************
1514              * CALCULATE INTERACTIONS *
1515              **************************/
1516
1517             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
1518             {
1519
1520             /* REACTION-FIELD ELECTROSTATICS */
1521             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1522
1523             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
1524
1525             fscal            = felec;
1526
1527             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1528
1529             /* Update vectorial force */
1530             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1531             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1532             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1533             
1534             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1535             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1536             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1537
1538             }
1539
1540             /**************************
1541              * CALCULATE INTERACTIONS *
1542              **************************/
1543
1544             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1545             {
1546
1547             /* REACTION-FIELD ELECTROSTATICS */
1548             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1549
1550             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1551
1552             fscal            = felec;
1553
1554             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1555
1556             /* Update vectorial force */
1557             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1558             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1559             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1560             
1561             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1562             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1563             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1564
1565             }
1566
1567             /**************************
1568              * CALCULATE INTERACTIONS *
1569              **************************/
1570
1571             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1572             {
1573
1574             /* REACTION-FIELD ELECTROSTATICS */
1575             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1576
1577             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1578
1579             fscal            = felec;
1580
1581             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1582
1583             /* Update vectorial force */
1584             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1585             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1586             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1587             
1588             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1589             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1590             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1591
1592             }
1593
1594             /**************************
1595              * CALCULATE INTERACTIONS *
1596              **************************/
1597
1598             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1599             {
1600
1601             /* REACTION-FIELD ELECTROSTATICS */
1602             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1603
1604             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
1605
1606             fscal            = felec;
1607
1608             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1609
1610             /* Update vectorial force */
1611             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1612             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1613             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1614             
1615             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1616             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1617             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1618
1619             }
1620
1621             /**************************
1622              * CALCULATE INTERACTIONS *
1623              **************************/
1624
1625             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1626             {
1627
1628             /* REACTION-FIELD ELECTROSTATICS */
1629             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1630
1631             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1632
1633             fscal            = felec;
1634
1635             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1636
1637             /* Update vectorial force */
1638             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1639             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1640             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1641             
1642             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1643             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1644             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1645
1646             }
1647
1648             /**************************
1649              * CALCULATE INTERACTIONS *
1650              **************************/
1651
1652             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1653             {
1654
1655             /* REACTION-FIELD ELECTROSTATICS */
1656             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1657
1658             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1659
1660             fscal            = felec;
1661
1662             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1663
1664             /* Update vectorial force */
1665             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1666             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1667             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1668             
1669             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1670             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1671             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1672
1673             }
1674
1675             /**************************
1676              * CALCULATE INTERACTIONS *
1677              **************************/
1678
1679             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1680             {
1681
1682             /* REACTION-FIELD ELECTROSTATICS */
1683             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1684
1685             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1686
1687             fscal            = felec;
1688
1689             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1690
1691             /* Update vectorial force */
1692             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1693             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1694             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1695             
1696             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1697             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1698             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1699
1700             }
1701
1702             gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1703
1704             /* Inner loop uses 351 flops */
1705         }
1706
1707         if(jidx<j_index_end)
1708         {
1709
1710             jnrA             = jjnr[jidx];
1711             j_coord_offsetA  = DIM*jnrA;
1712
1713             /* load j atom coordinates */
1714             gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1715                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1716                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1717
1718             /* Calculate displacement vector */
1719             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1720             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1721             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1722             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1723             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1724             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1725             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1726             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1727             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1728             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1729             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1730             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1731             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1732             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1733             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1734             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1735             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1736             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1737             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1738             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1739             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1740             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1741             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1742             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1743             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1744             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1745             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1746             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1747             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1748             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1749
1750             /* Calculate squared distance and things based on it */
1751             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1752             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1753             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1754             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1755             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1756             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1757             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1758             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1759             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1760             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1761
1762             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1763             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1764             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1765             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1766             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1767             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1768             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1769             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1770             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1771             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1772
1773             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1774             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1775             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1776             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1777             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1778             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1779             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1780             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1781             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1782
1783             fjx0             = _fjsp_setzero_v2r8();
1784             fjy0             = _fjsp_setzero_v2r8();
1785             fjz0             = _fjsp_setzero_v2r8();
1786             fjx1             = _fjsp_setzero_v2r8();
1787             fjy1             = _fjsp_setzero_v2r8();
1788             fjz1             = _fjsp_setzero_v2r8();
1789             fjx2             = _fjsp_setzero_v2r8();
1790             fjy2             = _fjsp_setzero_v2r8();
1791             fjz2             = _fjsp_setzero_v2r8();
1792             fjx3             = _fjsp_setzero_v2r8();
1793             fjy3             = _fjsp_setzero_v2r8();
1794             fjz3             = _fjsp_setzero_v2r8();
1795
1796             /**************************
1797              * CALCULATE INTERACTIONS *
1798              **************************/
1799
1800             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1801
1802             /* Calculate table index by multiplying r with table scale and truncate to integer */
1803             rt               = _fjsp_mul_v2r8(r00,vftabscale);
1804             itab_tmp         = _fjsp_dtox_v2r8(rt);
1805             vfeps            = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1806             twovfeps         = _fjsp_add_v2r8(vfeps,vfeps);
1807             _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1808
1809             vfconv.i[0]     *= 8;
1810             vfconv.i[1]     *= 8;
1811
1812             /* CUBIC SPLINE TABLE DISPERSION */
1813             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1814             F                = _fjsp_setzero_v2r8();
1815             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1816             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1817             H                = _fjsp_setzero_v2r8();
1818             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1819             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1820             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1821             fvdw6            = _fjsp_mul_v2r8(c6_00,FF);
1822
1823             /* CUBIC SPLINE TABLE REPULSION */
1824             Y                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1825             F                = _fjsp_setzero_v2r8();
1826             GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1827             G                = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1828             H                = _fjsp_setzero_v2r8();
1829             GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1830             Fp               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1831             FF               = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1832             fvdw12           = _fjsp_mul_v2r8(c12_00,FF);
1833             fvdw             = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1834
1835             fscal            = fvdw;
1836
1837             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1838
1839             /* Update vectorial force */
1840             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1841             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1842             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1843             
1844             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1845             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1846             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1847
1848             /**************************
1849              * CALCULATE INTERACTIONS *
1850              **************************/
1851
1852             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1853             {
1854
1855             /* REACTION-FIELD ELECTROSTATICS */
1856             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1857
1858             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1859
1860             fscal            = felec;
1861
1862             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1863
1864             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1865
1866             /* Update vectorial force */
1867             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1868             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1869             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1870             
1871             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1872             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1873             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1874
1875             }
1876
1877             /**************************
1878              * CALCULATE INTERACTIONS *
1879              **************************/
1880
1881             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1882             {
1883
1884             /* REACTION-FIELD ELECTROSTATICS */
1885             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1886
1887             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1888
1889             fscal            = felec;
1890
1891             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1892
1893             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1894
1895             /* Update vectorial force */
1896             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1897             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1898             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1899             
1900             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1901             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1902             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1903
1904             }
1905
1906             /**************************
1907              * CALCULATE INTERACTIONS *
1908              **************************/
1909
1910             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
1911             {
1912
1913             /* REACTION-FIELD ELECTROSTATICS */
1914             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1915
1916             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
1917
1918             fscal            = felec;
1919
1920             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1921
1922             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1923
1924             /* Update vectorial force */
1925             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1926             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1927             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1928             
1929             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1930             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1931             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1932
1933             }
1934
1935             /**************************
1936              * CALCULATE INTERACTIONS *
1937              **************************/
1938
1939             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1940             {
1941
1942             /* REACTION-FIELD ELECTROSTATICS */
1943             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1944
1945             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1946
1947             fscal            = felec;
1948
1949             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1950
1951             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1952
1953             /* Update vectorial force */
1954             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1955             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1956             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1957             
1958             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1959             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1960             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1961
1962             }
1963
1964             /**************************
1965              * CALCULATE INTERACTIONS *
1966              **************************/
1967
1968             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1969             {
1970
1971             /* REACTION-FIELD ELECTROSTATICS */
1972             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1973
1974             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1975
1976             fscal            = felec;
1977
1978             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1979
1980             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1981
1982             /* Update vectorial force */
1983             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1984             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1985             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1986             
1987             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1988             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1989             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1990
1991             }
1992
1993             /**************************
1994              * CALCULATE INTERACTIONS *
1995              **************************/
1996
1997             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1998             {
1999
2000             /* REACTION-FIELD ELECTROSTATICS */
2001             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
2002
2003             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
2004
2005             fscal            = felec;
2006
2007             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2008
2009             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2010
2011             /* Update vectorial force */
2012             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
2013             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
2014             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
2015             
2016             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
2017             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
2018             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
2019
2020             }
2021
2022             /**************************
2023              * CALCULATE INTERACTIONS *
2024              **************************/
2025
2026             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
2027             {
2028
2029             /* REACTION-FIELD ELECTROSTATICS */
2030             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
2031
2032             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
2033
2034             fscal            = felec;
2035
2036             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2037
2038             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2039
2040             /* Update vectorial force */
2041             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
2042             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
2043             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
2044             
2045             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
2046             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
2047             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
2048
2049             }
2050
2051             /**************************
2052              * CALCULATE INTERACTIONS *
2053              **************************/
2054
2055             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
2056             {
2057
2058             /* REACTION-FIELD ELECTROSTATICS */
2059             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
2060
2061             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
2062
2063             fscal            = felec;
2064
2065             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2066
2067             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2068
2069             /* Update vectorial force */
2070             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
2071             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
2072             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
2073             
2074             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
2075             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
2076             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
2077
2078             }
2079
2080             /**************************
2081              * CALCULATE INTERACTIONS *
2082              **************************/
2083
2084             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
2085             {
2086
2087             /* REACTION-FIELD ELECTROSTATICS */
2088             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
2089
2090             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
2091
2092             fscal            = felec;
2093
2094             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2095
2096             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2097
2098             /* Update vectorial force */
2099             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
2100             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
2101             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
2102             
2103             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
2104             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
2105             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
2106
2107             }
2108
2109             gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2110
2111             /* Inner loop uses 351 flops */
2112         }
2113
2114         /* End of innermost loop */
2115
2116         gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2117                                               f+i_coord_offset,fshift+i_shift_offset);
2118
2119         /* Increment number of inner iterations */
2120         inneriter                  += j_index_end - j_index_start;
2121
2122         /* Outer loop uses 24 flops */
2123     }
2124
2125     /* Increment number of outer iterations */
2126     outeriter        += nri;
2127
2128     /* Update outer/inner flops */
2129
2130     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*351);
2131 }