da712725eba131d7843c1ddd064ebddb313e5fd5
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "vec.h"
47 #include "nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: ReactionField
54  * VdW interaction:            LennardJones
55  * Geometry:                   Water4-Water4
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset0;
82     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
83     int              vdwioffset1;
84     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
85     int              vdwioffset2;
86     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87     int              vdwioffset3;
88     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
89     int              vdwjidx0A,vdwjidx0B;
90     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
91     int              vdwjidx1A,vdwjidx1B;
92     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
93     int              vdwjidx2A,vdwjidx2B;
94     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
95     int              vdwjidx3A,vdwjidx3B;
96     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
97     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
98     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
99     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
100     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
101     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
102     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
103     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
104     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
105     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
106     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
107     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
108     real             *charge;
109     int              nvdwtype;
110     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
111     int              *vdwtype;
112     real             *vdwparam;
113     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
114     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
115     _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
116     real             rswitch_scalar,d_scalar;
117     _fjsp_v2r8       itab_tmp;
118     _fjsp_v2r8       dummy_mask,cutoff_mask;
119     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
120     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
121     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
122
123     x                = xx[0];
124     f                = ff[0];
125
126     nri              = nlist->nri;
127     iinr             = nlist->iinr;
128     jindex           = nlist->jindex;
129     jjnr             = nlist->jjnr;
130     shiftidx         = nlist->shift;
131     gid              = nlist->gid;
132     shiftvec         = fr->shift_vec[0];
133     fshift           = fr->fshift[0];
134     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
135     charge           = mdatoms->chargeA;
136     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
137     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
138     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
139     nvdwtype         = fr->ntype;
140     vdwparam         = fr->nbfp;
141     vdwtype          = mdatoms->typeA;
142
143     /* Setup water-specific parameters */
144     inr              = nlist->iinr[0];
145     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
146     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
147     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
148     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
149
150     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
151     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
152     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
153     vdwjidx0A        = 2*vdwtype[inr+0];
154     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
155     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
156     qq11             = _fjsp_mul_v2r8(iq1,jq1);
157     qq12             = _fjsp_mul_v2r8(iq1,jq2);
158     qq13             = _fjsp_mul_v2r8(iq1,jq3);
159     qq21             = _fjsp_mul_v2r8(iq2,jq1);
160     qq22             = _fjsp_mul_v2r8(iq2,jq2);
161     qq23             = _fjsp_mul_v2r8(iq2,jq3);
162     qq31             = _fjsp_mul_v2r8(iq3,jq1);
163     qq32             = _fjsp_mul_v2r8(iq3,jq2);
164     qq33             = _fjsp_mul_v2r8(iq3,jq3);
165
166     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
167     rcutoff_scalar   = fr->rcoulomb;
168     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
169     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
170
171     rswitch_scalar   = fr->rvdw_switch;
172     rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
173     /* Setup switch parameters */
174     d_scalar         = rcutoff_scalar-rswitch_scalar;
175     d                = gmx_fjsp_set1_v2r8(d_scalar);
176     swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
177     swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
178     swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
179     swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
180     swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
181     swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
182
183     /* Avoid stupid compiler warnings */
184     jnrA = jnrB = 0;
185     j_coord_offsetA = 0;
186     j_coord_offsetB = 0;
187
188     outeriter        = 0;
189     inneriter        = 0;
190
191     /* Start outer loop over neighborlists */
192     for(iidx=0; iidx<nri; iidx++)
193     {
194         /* Load shift vector for this list */
195         i_shift_offset   = DIM*shiftidx[iidx];
196
197         /* Load limits for loop over neighbors */
198         j_index_start    = jindex[iidx];
199         j_index_end      = jindex[iidx+1];
200
201         /* Get outer coordinate index */
202         inr              = iinr[iidx];
203         i_coord_offset   = DIM*inr;
204
205         /* Load i particle coords and add shift vector */
206         gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
207                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
208
209         fix0             = _fjsp_setzero_v2r8();
210         fiy0             = _fjsp_setzero_v2r8();
211         fiz0             = _fjsp_setzero_v2r8();
212         fix1             = _fjsp_setzero_v2r8();
213         fiy1             = _fjsp_setzero_v2r8();
214         fiz1             = _fjsp_setzero_v2r8();
215         fix2             = _fjsp_setzero_v2r8();
216         fiy2             = _fjsp_setzero_v2r8();
217         fiz2             = _fjsp_setzero_v2r8();
218         fix3             = _fjsp_setzero_v2r8();
219         fiy3             = _fjsp_setzero_v2r8();
220         fiz3             = _fjsp_setzero_v2r8();
221
222         /* Reset potential sums */
223         velecsum         = _fjsp_setzero_v2r8();
224         vvdwsum          = _fjsp_setzero_v2r8();
225
226         /* Start inner kernel loop */
227         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
228         {
229
230             /* Get j neighbor index, and coordinate index */
231             jnrA             = jjnr[jidx];
232             jnrB             = jjnr[jidx+1];
233             j_coord_offsetA  = DIM*jnrA;
234             j_coord_offsetB  = DIM*jnrB;
235
236             /* load j atom coordinates */
237             gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
238                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
239                                               &jy2,&jz2,&jx3,&jy3,&jz3);
240
241             /* Calculate displacement vector */
242             dx00             = _fjsp_sub_v2r8(ix0,jx0);
243             dy00             = _fjsp_sub_v2r8(iy0,jy0);
244             dz00             = _fjsp_sub_v2r8(iz0,jz0);
245             dx11             = _fjsp_sub_v2r8(ix1,jx1);
246             dy11             = _fjsp_sub_v2r8(iy1,jy1);
247             dz11             = _fjsp_sub_v2r8(iz1,jz1);
248             dx12             = _fjsp_sub_v2r8(ix1,jx2);
249             dy12             = _fjsp_sub_v2r8(iy1,jy2);
250             dz12             = _fjsp_sub_v2r8(iz1,jz2);
251             dx13             = _fjsp_sub_v2r8(ix1,jx3);
252             dy13             = _fjsp_sub_v2r8(iy1,jy3);
253             dz13             = _fjsp_sub_v2r8(iz1,jz3);
254             dx21             = _fjsp_sub_v2r8(ix2,jx1);
255             dy21             = _fjsp_sub_v2r8(iy2,jy1);
256             dz21             = _fjsp_sub_v2r8(iz2,jz1);
257             dx22             = _fjsp_sub_v2r8(ix2,jx2);
258             dy22             = _fjsp_sub_v2r8(iy2,jy2);
259             dz22             = _fjsp_sub_v2r8(iz2,jz2);
260             dx23             = _fjsp_sub_v2r8(ix2,jx3);
261             dy23             = _fjsp_sub_v2r8(iy2,jy3);
262             dz23             = _fjsp_sub_v2r8(iz2,jz3);
263             dx31             = _fjsp_sub_v2r8(ix3,jx1);
264             dy31             = _fjsp_sub_v2r8(iy3,jy1);
265             dz31             = _fjsp_sub_v2r8(iz3,jz1);
266             dx32             = _fjsp_sub_v2r8(ix3,jx2);
267             dy32             = _fjsp_sub_v2r8(iy3,jy2);
268             dz32             = _fjsp_sub_v2r8(iz3,jz2);
269             dx33             = _fjsp_sub_v2r8(ix3,jx3);
270             dy33             = _fjsp_sub_v2r8(iy3,jy3);
271             dz33             = _fjsp_sub_v2r8(iz3,jz3);
272
273             /* Calculate squared distance and things based on it */
274             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
275             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
276             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
277             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
278             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
279             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
280             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
281             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
282             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
283             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
284
285             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
286             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
287             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
288             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
289             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
290             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
291             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
292             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
293             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
294             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
295
296             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
297             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
298             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
299             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
300             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
301             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
302             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
303             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
304             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
305             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
306
307             fjx0             = _fjsp_setzero_v2r8();
308             fjy0             = _fjsp_setzero_v2r8();
309             fjz0             = _fjsp_setzero_v2r8();
310             fjx1             = _fjsp_setzero_v2r8();
311             fjy1             = _fjsp_setzero_v2r8();
312             fjz1             = _fjsp_setzero_v2r8();
313             fjx2             = _fjsp_setzero_v2r8();
314             fjy2             = _fjsp_setzero_v2r8();
315             fjz2             = _fjsp_setzero_v2r8();
316             fjx3             = _fjsp_setzero_v2r8();
317             fjy3             = _fjsp_setzero_v2r8();
318             fjz3             = _fjsp_setzero_v2r8();
319
320             /**************************
321              * CALCULATE INTERACTIONS *
322              **************************/
323
324             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
325             {
326
327             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
328
329             /* LENNARD-JONES DISPERSION/REPULSION */
330
331             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
332             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
333             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
334             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
335             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
336
337             d                = _fjsp_sub_v2r8(r00,rswitch);
338             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
339             d2               = _fjsp_mul_v2r8(d,d);
340             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
341
342             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
343
344             /* Evaluate switch function */
345             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
346             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
347             vvdw             = _fjsp_mul_v2r8(vvdw,sw);
348             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
349
350             /* Update potential sum for this i atom from the interaction with this j atom. */
351             vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
352             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
353
354             fscal            = fvdw;
355
356             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
357
358             /* Update vectorial force */
359             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
360             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
361             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
362             
363             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
364             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
365             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
366
367             }
368
369             /**************************
370              * CALCULATE INTERACTIONS *
371              **************************/
372
373             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
374             {
375
376             /* REACTION-FIELD ELECTROSTATICS */
377             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
378             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
379
380             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
381
382             /* Update potential sum for this i atom from the interaction with this j atom. */
383             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
384             velecsum         = _fjsp_add_v2r8(velecsum,velec);
385
386             fscal            = felec;
387
388             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
389
390             /* Update vectorial force */
391             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
392             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
393             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
394             
395             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
396             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
397             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
398
399             }
400
401             /**************************
402              * CALCULATE INTERACTIONS *
403              **************************/
404
405             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
406             {
407
408             /* REACTION-FIELD ELECTROSTATICS */
409             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
410             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
411
412             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
413
414             /* Update potential sum for this i atom from the interaction with this j atom. */
415             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
416             velecsum         = _fjsp_add_v2r8(velecsum,velec);
417
418             fscal            = felec;
419
420             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
421
422             /* Update vectorial force */
423             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
424             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
425             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
426             
427             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
428             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
429             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
430
431             }
432
433             /**************************
434              * CALCULATE INTERACTIONS *
435              **************************/
436
437             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
438             {
439
440             /* REACTION-FIELD ELECTROSTATICS */
441             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
442             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
443
444             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
445
446             /* Update potential sum for this i atom from the interaction with this j atom. */
447             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
448             velecsum         = _fjsp_add_v2r8(velecsum,velec);
449
450             fscal            = felec;
451
452             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
453
454             /* Update vectorial force */
455             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
456             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
457             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
458             
459             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
460             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
461             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
462
463             }
464
465             /**************************
466              * CALCULATE INTERACTIONS *
467              **************************/
468
469             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
470             {
471
472             /* REACTION-FIELD ELECTROSTATICS */
473             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
474             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
475
476             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
477
478             /* Update potential sum for this i atom from the interaction with this j atom. */
479             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
480             velecsum         = _fjsp_add_v2r8(velecsum,velec);
481
482             fscal            = felec;
483
484             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
485
486             /* Update vectorial force */
487             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
488             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
489             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
490             
491             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
492             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
493             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
494
495             }
496
497             /**************************
498              * CALCULATE INTERACTIONS *
499              **************************/
500
501             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
502             {
503
504             /* REACTION-FIELD ELECTROSTATICS */
505             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
506             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
507
508             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
509
510             /* Update potential sum for this i atom from the interaction with this j atom. */
511             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
512             velecsum         = _fjsp_add_v2r8(velecsum,velec);
513
514             fscal            = felec;
515
516             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
517
518             /* Update vectorial force */
519             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
520             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
521             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
522             
523             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
524             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
525             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
526
527             }
528
529             /**************************
530              * CALCULATE INTERACTIONS *
531              **************************/
532
533             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
534             {
535
536             /* REACTION-FIELD ELECTROSTATICS */
537             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
538             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
539
540             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
541
542             /* Update potential sum for this i atom from the interaction with this j atom. */
543             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
544             velecsum         = _fjsp_add_v2r8(velecsum,velec);
545
546             fscal            = felec;
547
548             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
549
550             /* Update vectorial force */
551             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
552             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
553             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
554             
555             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
556             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
557             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
558
559             }
560
561             /**************************
562              * CALCULATE INTERACTIONS *
563              **************************/
564
565             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
566             {
567
568             /* REACTION-FIELD ELECTROSTATICS */
569             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
570             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
571
572             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
573
574             /* Update potential sum for this i atom from the interaction with this j atom. */
575             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
576             velecsum         = _fjsp_add_v2r8(velecsum,velec);
577
578             fscal            = felec;
579
580             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
581
582             /* Update vectorial force */
583             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
584             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
585             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
586             
587             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
588             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
589             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
590
591             }
592
593             /**************************
594              * CALCULATE INTERACTIONS *
595              **************************/
596
597             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
598             {
599
600             /* REACTION-FIELD ELECTROSTATICS */
601             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
602             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
603
604             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
605
606             /* Update potential sum for this i atom from the interaction with this j atom. */
607             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
608             velecsum         = _fjsp_add_v2r8(velecsum,velec);
609
610             fscal            = felec;
611
612             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
613
614             /* Update vectorial force */
615             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
616             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
617             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
618             
619             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
620             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
621             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
622
623             }
624
625             /**************************
626              * CALCULATE INTERACTIONS *
627              **************************/
628
629             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
630             {
631
632             /* REACTION-FIELD ELECTROSTATICS */
633             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
634             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
635
636             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
637
638             /* Update potential sum for this i atom from the interaction with this j atom. */
639             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
640             velecsum         = _fjsp_add_v2r8(velecsum,velec);
641
642             fscal            = felec;
643
644             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
645
646             /* Update vectorial force */
647             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
648             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
649             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
650             
651             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
652             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
653             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
654
655             }
656
657             gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
658
659             /* Inner loop uses 416 flops */
660         }
661
662         if(jidx<j_index_end)
663         {
664
665             jnrA             = jjnr[jidx];
666             j_coord_offsetA  = DIM*jnrA;
667
668             /* load j atom coordinates */
669             gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
670                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
671                                               &jy2,&jz2,&jx3,&jy3,&jz3);
672
673             /* Calculate displacement vector */
674             dx00             = _fjsp_sub_v2r8(ix0,jx0);
675             dy00             = _fjsp_sub_v2r8(iy0,jy0);
676             dz00             = _fjsp_sub_v2r8(iz0,jz0);
677             dx11             = _fjsp_sub_v2r8(ix1,jx1);
678             dy11             = _fjsp_sub_v2r8(iy1,jy1);
679             dz11             = _fjsp_sub_v2r8(iz1,jz1);
680             dx12             = _fjsp_sub_v2r8(ix1,jx2);
681             dy12             = _fjsp_sub_v2r8(iy1,jy2);
682             dz12             = _fjsp_sub_v2r8(iz1,jz2);
683             dx13             = _fjsp_sub_v2r8(ix1,jx3);
684             dy13             = _fjsp_sub_v2r8(iy1,jy3);
685             dz13             = _fjsp_sub_v2r8(iz1,jz3);
686             dx21             = _fjsp_sub_v2r8(ix2,jx1);
687             dy21             = _fjsp_sub_v2r8(iy2,jy1);
688             dz21             = _fjsp_sub_v2r8(iz2,jz1);
689             dx22             = _fjsp_sub_v2r8(ix2,jx2);
690             dy22             = _fjsp_sub_v2r8(iy2,jy2);
691             dz22             = _fjsp_sub_v2r8(iz2,jz2);
692             dx23             = _fjsp_sub_v2r8(ix2,jx3);
693             dy23             = _fjsp_sub_v2r8(iy2,jy3);
694             dz23             = _fjsp_sub_v2r8(iz2,jz3);
695             dx31             = _fjsp_sub_v2r8(ix3,jx1);
696             dy31             = _fjsp_sub_v2r8(iy3,jy1);
697             dz31             = _fjsp_sub_v2r8(iz3,jz1);
698             dx32             = _fjsp_sub_v2r8(ix3,jx2);
699             dy32             = _fjsp_sub_v2r8(iy3,jy2);
700             dz32             = _fjsp_sub_v2r8(iz3,jz2);
701             dx33             = _fjsp_sub_v2r8(ix3,jx3);
702             dy33             = _fjsp_sub_v2r8(iy3,jy3);
703             dz33             = _fjsp_sub_v2r8(iz3,jz3);
704
705             /* Calculate squared distance and things based on it */
706             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
707             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
708             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
709             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
710             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
711             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
712             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
713             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
714             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
715             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
716
717             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
718             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
719             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
720             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
721             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
722             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
723             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
724             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
725             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
726             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
727
728             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
729             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
730             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
731             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
732             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
733             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
734             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
735             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
736             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
737             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
738
739             fjx0             = _fjsp_setzero_v2r8();
740             fjy0             = _fjsp_setzero_v2r8();
741             fjz0             = _fjsp_setzero_v2r8();
742             fjx1             = _fjsp_setzero_v2r8();
743             fjy1             = _fjsp_setzero_v2r8();
744             fjz1             = _fjsp_setzero_v2r8();
745             fjx2             = _fjsp_setzero_v2r8();
746             fjy2             = _fjsp_setzero_v2r8();
747             fjz2             = _fjsp_setzero_v2r8();
748             fjx3             = _fjsp_setzero_v2r8();
749             fjy3             = _fjsp_setzero_v2r8();
750             fjz3             = _fjsp_setzero_v2r8();
751
752             /**************************
753              * CALCULATE INTERACTIONS *
754              **************************/
755
756             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
757             {
758
759             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
760
761             /* LENNARD-JONES DISPERSION/REPULSION */
762
763             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
764             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
765             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
766             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
767             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
768
769             d                = _fjsp_sub_v2r8(r00,rswitch);
770             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
771             d2               = _fjsp_mul_v2r8(d,d);
772             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
773
774             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
775
776             /* Evaluate switch function */
777             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
778             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
779             vvdw             = _fjsp_mul_v2r8(vvdw,sw);
780             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
781
782             /* Update potential sum for this i atom from the interaction with this j atom. */
783             vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
784             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
785             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
786
787             fscal            = fvdw;
788
789             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
790
791             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
792
793             /* Update vectorial force */
794             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
795             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
796             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
797             
798             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
799             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
800             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
801
802             }
803
804             /**************************
805              * CALCULATE INTERACTIONS *
806              **************************/
807
808             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
809             {
810
811             /* REACTION-FIELD ELECTROSTATICS */
812             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
813             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
814
815             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
816
817             /* Update potential sum for this i atom from the interaction with this j atom. */
818             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
819             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
820             velecsum         = _fjsp_add_v2r8(velecsum,velec);
821
822             fscal            = felec;
823
824             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
825
826             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
827
828             /* Update vectorial force */
829             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
830             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
831             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
832             
833             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
834             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
835             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
836
837             }
838
839             /**************************
840              * CALCULATE INTERACTIONS *
841              **************************/
842
843             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
844             {
845
846             /* REACTION-FIELD ELECTROSTATICS */
847             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
848             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
849
850             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
851
852             /* Update potential sum for this i atom from the interaction with this j atom. */
853             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
854             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
855             velecsum         = _fjsp_add_v2r8(velecsum,velec);
856
857             fscal            = felec;
858
859             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
860
861             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
862
863             /* Update vectorial force */
864             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
865             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
866             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
867             
868             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
869             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
870             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
871
872             }
873
874             /**************************
875              * CALCULATE INTERACTIONS *
876              **************************/
877
878             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
879             {
880
881             /* REACTION-FIELD ELECTROSTATICS */
882             velec            = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
883             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
884
885             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
886
887             /* Update potential sum for this i atom from the interaction with this j atom. */
888             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
889             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
890             velecsum         = _fjsp_add_v2r8(velecsum,velec);
891
892             fscal            = felec;
893
894             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
895
896             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
897
898             /* Update vectorial force */
899             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
900             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
901             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
902             
903             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
904             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
905             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
906
907             }
908
909             /**************************
910              * CALCULATE INTERACTIONS *
911              **************************/
912
913             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
914             {
915
916             /* REACTION-FIELD ELECTROSTATICS */
917             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
918             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
919
920             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
921
922             /* Update potential sum for this i atom from the interaction with this j atom. */
923             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
924             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
925             velecsum         = _fjsp_add_v2r8(velecsum,velec);
926
927             fscal            = felec;
928
929             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
930
931             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
932
933             /* Update vectorial force */
934             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
935             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
936             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
937             
938             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
939             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
940             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
941
942             }
943
944             /**************************
945              * CALCULATE INTERACTIONS *
946              **************************/
947
948             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
949             {
950
951             /* REACTION-FIELD ELECTROSTATICS */
952             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
953             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
954
955             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
956
957             /* Update potential sum for this i atom from the interaction with this j atom. */
958             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
959             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
960             velecsum         = _fjsp_add_v2r8(velecsum,velec);
961
962             fscal            = felec;
963
964             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
965
966             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
967
968             /* Update vectorial force */
969             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
970             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
971             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
972             
973             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
974             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
975             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
976
977             }
978
979             /**************************
980              * CALCULATE INTERACTIONS *
981              **************************/
982
983             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
984             {
985
986             /* REACTION-FIELD ELECTROSTATICS */
987             velec            = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
988             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
989
990             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
991
992             /* Update potential sum for this i atom from the interaction with this j atom. */
993             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
994             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
995             velecsum         = _fjsp_add_v2r8(velecsum,velec);
996
997             fscal            = felec;
998
999             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1000
1001             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1002
1003             /* Update vectorial force */
1004             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1005             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1006             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1007             
1008             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1009             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1010             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1011
1012             }
1013
1014             /**************************
1015              * CALCULATE INTERACTIONS *
1016              **************************/
1017
1018             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1019             {
1020
1021             /* REACTION-FIELD ELECTROSTATICS */
1022             velec            = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
1023             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1024
1025             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1026
1027             /* Update potential sum for this i atom from the interaction with this j atom. */
1028             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1029             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1030             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1031
1032             fscal            = felec;
1033
1034             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1035
1036             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1037
1038             /* Update vectorial force */
1039             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1040             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1041             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1042             
1043             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1044             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1045             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1046
1047             }
1048
1049             /**************************
1050              * CALCULATE INTERACTIONS *
1051              **************************/
1052
1053             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1054             {
1055
1056             /* REACTION-FIELD ELECTROSTATICS */
1057             velec            = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
1058             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1059
1060             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1061
1062             /* Update potential sum for this i atom from the interaction with this j atom. */
1063             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1064             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1065             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1066
1067             fscal            = felec;
1068
1069             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1070
1071             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1072
1073             /* Update vectorial force */
1074             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1075             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1076             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1077             
1078             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1079             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1080             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1081
1082             }
1083
1084             /**************************
1085              * CALCULATE INTERACTIONS *
1086              **************************/
1087
1088             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1089             {
1090
1091             /* REACTION-FIELD ELECTROSTATICS */
1092             velec            = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
1093             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1094
1095             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1096
1097             /* Update potential sum for this i atom from the interaction with this j atom. */
1098             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1099             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1100             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1101
1102             fscal            = felec;
1103
1104             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1105
1106             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1107
1108             /* Update vectorial force */
1109             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1110             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1111             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1112             
1113             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1114             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1115             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1116
1117             }
1118
1119             gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1120
1121             /* Inner loop uses 416 flops */
1122         }
1123
1124         /* End of innermost loop */
1125
1126         gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
1127                                               f+i_coord_offset,fshift+i_shift_offset);
1128
1129         ggid                        = gid[iidx];
1130         /* Update potential energies */
1131         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1132         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1133
1134         /* Increment number of inner iterations */
1135         inneriter                  += j_index_end - j_index_start;
1136
1137         /* Outer loop uses 26 flops */
1138     }
1139
1140     /* Increment number of outer iterations */
1141     outeriter        += nri;
1142
1143     /* Update outer/inner flops */
1144
1145     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*416);
1146 }
1147 /*
1148  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
1149  * Electrostatics interaction: ReactionField
1150  * VdW interaction:            LennardJones
1151  * Geometry:                   Water4-Water4
1152  * Calculate force/pot:        Force
1153  */
1154 void
1155 nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
1156                     (t_nblist                    * gmx_restrict       nlist,
1157                      rvec                        * gmx_restrict          xx,
1158                      rvec                        * gmx_restrict          ff,
1159                      t_forcerec                  * gmx_restrict          fr,
1160                      t_mdatoms                   * gmx_restrict     mdatoms,
1161                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1162                      t_nrnb                      * gmx_restrict        nrnb)
1163 {
1164     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1165      * just 0 for non-waters.
1166      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1167      * jnr indices corresponding to data put in the four positions in the SIMD register.
1168      */
1169     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1170     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1171     int              jnrA,jnrB;
1172     int              j_coord_offsetA,j_coord_offsetB;
1173     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1174     real             rcutoff_scalar;
1175     real             *shiftvec,*fshift,*x,*f;
1176     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1177     int              vdwioffset0;
1178     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1179     int              vdwioffset1;
1180     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1181     int              vdwioffset2;
1182     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1183     int              vdwioffset3;
1184     _fjsp_v2r8       ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
1185     int              vdwjidx0A,vdwjidx0B;
1186     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1187     int              vdwjidx1A,vdwjidx1B;
1188     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1189     int              vdwjidx2A,vdwjidx2B;
1190     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1191     int              vdwjidx3A,vdwjidx3B;
1192     _fjsp_v2r8       jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
1193     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1194     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1195     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1196     _fjsp_v2r8       dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
1197     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1198     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1199     _fjsp_v2r8       dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
1200     _fjsp_v2r8       dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
1201     _fjsp_v2r8       dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
1202     _fjsp_v2r8       dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
1203     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1204     real             *charge;
1205     int              nvdwtype;
1206     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1207     int              *vdwtype;
1208     real             *vdwparam;
1209     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1210     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1211     _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
1212     real             rswitch_scalar,d_scalar;
1213     _fjsp_v2r8       itab_tmp;
1214     _fjsp_v2r8       dummy_mask,cutoff_mask;
1215     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1216     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1217     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1218
1219     x                = xx[0];
1220     f                = ff[0];
1221
1222     nri              = nlist->nri;
1223     iinr             = nlist->iinr;
1224     jindex           = nlist->jindex;
1225     jjnr             = nlist->jjnr;
1226     shiftidx         = nlist->shift;
1227     gid              = nlist->gid;
1228     shiftvec         = fr->shift_vec[0];
1229     fshift           = fr->fshift[0];
1230     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1231     charge           = mdatoms->chargeA;
1232     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1233     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1234     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1235     nvdwtype         = fr->ntype;
1236     vdwparam         = fr->nbfp;
1237     vdwtype          = mdatoms->typeA;
1238
1239     /* Setup water-specific parameters */
1240     inr              = nlist->iinr[0];
1241     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1242     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1243     iq3              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
1244     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1245
1246     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1247     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1248     jq3              = gmx_fjsp_set1_v2r8(charge[inr+3]);
1249     vdwjidx0A        = 2*vdwtype[inr+0];
1250     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1251     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1252     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1253     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1254     qq13             = _fjsp_mul_v2r8(iq1,jq3);
1255     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1256     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1257     qq23             = _fjsp_mul_v2r8(iq2,jq3);
1258     qq31             = _fjsp_mul_v2r8(iq3,jq1);
1259     qq32             = _fjsp_mul_v2r8(iq3,jq2);
1260     qq33             = _fjsp_mul_v2r8(iq3,jq3);
1261
1262     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1263     rcutoff_scalar   = fr->rcoulomb;
1264     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1265     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
1266
1267     rswitch_scalar   = fr->rvdw_switch;
1268     rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
1269     /* Setup switch parameters */
1270     d_scalar         = rcutoff_scalar-rswitch_scalar;
1271     d                = gmx_fjsp_set1_v2r8(d_scalar);
1272     swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
1273     swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
1274     swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
1275     swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
1276     swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
1277     swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
1278
1279     /* Avoid stupid compiler warnings */
1280     jnrA = jnrB = 0;
1281     j_coord_offsetA = 0;
1282     j_coord_offsetB = 0;
1283
1284     outeriter        = 0;
1285     inneriter        = 0;
1286
1287     /* Start outer loop over neighborlists */
1288     for(iidx=0; iidx<nri; iidx++)
1289     {
1290         /* Load shift vector for this list */
1291         i_shift_offset   = DIM*shiftidx[iidx];
1292
1293         /* Load limits for loop over neighbors */
1294         j_index_start    = jindex[iidx];
1295         j_index_end      = jindex[iidx+1];
1296
1297         /* Get outer coordinate index */
1298         inr              = iinr[iidx];
1299         i_coord_offset   = DIM*inr;
1300
1301         /* Load i particle coords and add shift vector */
1302         gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1303                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
1304
1305         fix0             = _fjsp_setzero_v2r8();
1306         fiy0             = _fjsp_setzero_v2r8();
1307         fiz0             = _fjsp_setzero_v2r8();
1308         fix1             = _fjsp_setzero_v2r8();
1309         fiy1             = _fjsp_setzero_v2r8();
1310         fiz1             = _fjsp_setzero_v2r8();
1311         fix2             = _fjsp_setzero_v2r8();
1312         fiy2             = _fjsp_setzero_v2r8();
1313         fiz2             = _fjsp_setzero_v2r8();
1314         fix3             = _fjsp_setzero_v2r8();
1315         fiy3             = _fjsp_setzero_v2r8();
1316         fiz3             = _fjsp_setzero_v2r8();
1317
1318         /* Start inner kernel loop */
1319         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1320         {
1321
1322             /* Get j neighbor index, and coordinate index */
1323             jnrA             = jjnr[jidx];
1324             jnrB             = jjnr[jidx+1];
1325             j_coord_offsetA  = DIM*jnrA;
1326             j_coord_offsetB  = DIM*jnrB;
1327
1328             /* load j atom coordinates */
1329             gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1330                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1331                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1332
1333             /* Calculate displacement vector */
1334             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1335             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1336             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1337             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1338             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1339             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1340             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1341             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1342             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1343             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1344             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1345             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1346             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1347             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1348             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1349             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1350             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1351             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1352             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1353             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1354             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1355             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1356             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1357             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1358             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1359             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1360             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1361             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1362             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1363             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1364
1365             /* Calculate squared distance and things based on it */
1366             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1367             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1368             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1369             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1370             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1371             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1372             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1373             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1374             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1375             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1376
1377             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1378             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1379             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1380             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1381             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1382             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1383             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1384             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1385             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1386             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1387
1388             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1389             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1390             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1391             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1392             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1393             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1394             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1395             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1396             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1397             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1398
1399             fjx0             = _fjsp_setzero_v2r8();
1400             fjy0             = _fjsp_setzero_v2r8();
1401             fjz0             = _fjsp_setzero_v2r8();
1402             fjx1             = _fjsp_setzero_v2r8();
1403             fjy1             = _fjsp_setzero_v2r8();
1404             fjz1             = _fjsp_setzero_v2r8();
1405             fjx2             = _fjsp_setzero_v2r8();
1406             fjy2             = _fjsp_setzero_v2r8();
1407             fjz2             = _fjsp_setzero_v2r8();
1408             fjx3             = _fjsp_setzero_v2r8();
1409             fjy3             = _fjsp_setzero_v2r8();
1410             fjz3             = _fjsp_setzero_v2r8();
1411
1412             /**************************
1413              * CALCULATE INTERACTIONS *
1414              **************************/
1415
1416             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1417             {
1418
1419             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1420
1421             /* LENNARD-JONES DISPERSION/REPULSION */
1422
1423             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1424             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
1425             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
1426             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
1427             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
1428
1429             d                = _fjsp_sub_v2r8(r00,rswitch);
1430             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
1431             d2               = _fjsp_mul_v2r8(d,d);
1432             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
1433
1434             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
1435
1436             /* Evaluate switch function */
1437             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
1438             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
1439             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1440
1441             fscal            = fvdw;
1442
1443             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1444
1445             /* Update vectorial force */
1446             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1447             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1448             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1449             
1450             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1451             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1452             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1453
1454             }
1455
1456             /**************************
1457              * CALCULATE INTERACTIONS *
1458              **************************/
1459
1460             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1461             {
1462
1463             /* REACTION-FIELD ELECTROSTATICS */
1464             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1465
1466             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1467
1468             fscal            = felec;
1469
1470             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1471
1472             /* Update vectorial force */
1473             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1474             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1475             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1476             
1477             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1478             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1479             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1480
1481             }
1482
1483             /**************************
1484              * CALCULATE INTERACTIONS *
1485              **************************/
1486
1487             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1488             {
1489
1490             /* REACTION-FIELD ELECTROSTATICS */
1491             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1492
1493             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1494
1495             fscal            = felec;
1496
1497             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1498
1499             /* Update vectorial force */
1500             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1501             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1502             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1503             
1504             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1505             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1506             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1507
1508             }
1509
1510             /**************************
1511              * CALCULATE INTERACTIONS *
1512              **************************/
1513
1514             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
1515             {
1516
1517             /* REACTION-FIELD ELECTROSTATICS */
1518             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1519
1520             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
1521
1522             fscal            = felec;
1523
1524             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1525
1526             /* Update vectorial force */
1527             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1528             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1529             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1530             
1531             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1532             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1533             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1534
1535             }
1536
1537             /**************************
1538              * CALCULATE INTERACTIONS *
1539              **************************/
1540
1541             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1542             {
1543
1544             /* REACTION-FIELD ELECTROSTATICS */
1545             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1546
1547             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1548
1549             fscal            = felec;
1550
1551             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1552
1553             /* Update vectorial force */
1554             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1555             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1556             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1557             
1558             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1559             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1560             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1561
1562             }
1563
1564             /**************************
1565              * CALCULATE INTERACTIONS *
1566              **************************/
1567
1568             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1569             {
1570
1571             /* REACTION-FIELD ELECTROSTATICS */
1572             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1573
1574             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1575
1576             fscal            = felec;
1577
1578             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1579
1580             /* Update vectorial force */
1581             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1582             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1583             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1584             
1585             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1586             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1587             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1588
1589             }
1590
1591             /**************************
1592              * CALCULATE INTERACTIONS *
1593              **************************/
1594
1595             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1596             {
1597
1598             /* REACTION-FIELD ELECTROSTATICS */
1599             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1600
1601             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
1602
1603             fscal            = felec;
1604
1605             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1606
1607             /* Update vectorial force */
1608             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
1609             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
1610             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
1611             
1612             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
1613             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
1614             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
1615
1616             }
1617
1618             /**************************
1619              * CALCULATE INTERACTIONS *
1620              **************************/
1621
1622             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
1623             {
1624
1625             /* REACTION-FIELD ELECTROSTATICS */
1626             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
1627
1628             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
1629
1630             fscal            = felec;
1631
1632             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1633
1634             /* Update vectorial force */
1635             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
1636             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
1637             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
1638             
1639             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
1640             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
1641             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
1642
1643             }
1644
1645             /**************************
1646              * CALCULATE INTERACTIONS *
1647              **************************/
1648
1649             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
1650             {
1651
1652             /* REACTION-FIELD ELECTROSTATICS */
1653             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
1654
1655             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
1656
1657             fscal            = felec;
1658
1659             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1660
1661             /* Update vectorial force */
1662             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
1663             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
1664             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
1665             
1666             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
1667             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
1668             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
1669
1670             }
1671
1672             /**************************
1673              * CALCULATE INTERACTIONS *
1674              **************************/
1675
1676             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
1677             {
1678
1679             /* REACTION-FIELD ELECTROSTATICS */
1680             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
1681
1682             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
1683
1684             fscal            = felec;
1685
1686             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1687
1688             /* Update vectorial force */
1689             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
1690             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
1691             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
1692             
1693             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
1694             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
1695             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
1696
1697             }
1698
1699             gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
1700
1701             /* Inner loop uses 359 flops */
1702         }
1703
1704         if(jidx<j_index_end)
1705         {
1706
1707             jnrA             = jjnr[jidx];
1708             j_coord_offsetA  = DIM*jnrA;
1709
1710             /* load j atom coordinates */
1711             gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1712                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
1713                                               &jy2,&jz2,&jx3,&jy3,&jz3);
1714
1715             /* Calculate displacement vector */
1716             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1717             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1718             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1719             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1720             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1721             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1722             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1723             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1724             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1725             dx13             = _fjsp_sub_v2r8(ix1,jx3);
1726             dy13             = _fjsp_sub_v2r8(iy1,jy3);
1727             dz13             = _fjsp_sub_v2r8(iz1,jz3);
1728             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1729             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1730             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1731             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1732             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1733             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1734             dx23             = _fjsp_sub_v2r8(ix2,jx3);
1735             dy23             = _fjsp_sub_v2r8(iy2,jy3);
1736             dz23             = _fjsp_sub_v2r8(iz2,jz3);
1737             dx31             = _fjsp_sub_v2r8(ix3,jx1);
1738             dy31             = _fjsp_sub_v2r8(iy3,jy1);
1739             dz31             = _fjsp_sub_v2r8(iz3,jz1);
1740             dx32             = _fjsp_sub_v2r8(ix3,jx2);
1741             dy32             = _fjsp_sub_v2r8(iy3,jy2);
1742             dz32             = _fjsp_sub_v2r8(iz3,jz2);
1743             dx33             = _fjsp_sub_v2r8(ix3,jx3);
1744             dy33             = _fjsp_sub_v2r8(iy3,jy3);
1745             dz33             = _fjsp_sub_v2r8(iz3,jz3);
1746
1747             /* Calculate squared distance and things based on it */
1748             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1749             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1750             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1751             rsq13            = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
1752             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1753             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1754             rsq23            = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
1755             rsq31            = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
1756             rsq32            = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
1757             rsq33            = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
1758
1759             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1760             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1761             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1762             rinv13           = gmx_fjsp_invsqrt_v2r8(rsq13);
1763             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1764             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1765             rinv23           = gmx_fjsp_invsqrt_v2r8(rsq23);
1766             rinv31           = gmx_fjsp_invsqrt_v2r8(rsq31);
1767             rinv32           = gmx_fjsp_invsqrt_v2r8(rsq32);
1768             rinv33           = gmx_fjsp_invsqrt_v2r8(rsq33);
1769
1770             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1771             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1772             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1773             rinvsq13         = _fjsp_mul_v2r8(rinv13,rinv13);
1774             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1775             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1776             rinvsq23         = _fjsp_mul_v2r8(rinv23,rinv23);
1777             rinvsq31         = _fjsp_mul_v2r8(rinv31,rinv31);
1778             rinvsq32         = _fjsp_mul_v2r8(rinv32,rinv32);
1779             rinvsq33         = _fjsp_mul_v2r8(rinv33,rinv33);
1780
1781             fjx0             = _fjsp_setzero_v2r8();
1782             fjy0             = _fjsp_setzero_v2r8();
1783             fjz0             = _fjsp_setzero_v2r8();
1784             fjx1             = _fjsp_setzero_v2r8();
1785             fjy1             = _fjsp_setzero_v2r8();
1786             fjz1             = _fjsp_setzero_v2r8();
1787             fjx2             = _fjsp_setzero_v2r8();
1788             fjy2             = _fjsp_setzero_v2r8();
1789             fjz2             = _fjsp_setzero_v2r8();
1790             fjx3             = _fjsp_setzero_v2r8();
1791             fjy3             = _fjsp_setzero_v2r8();
1792             fjz3             = _fjsp_setzero_v2r8();
1793
1794             /**************************
1795              * CALCULATE INTERACTIONS *
1796              **************************/
1797
1798             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1799             {
1800
1801             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1802
1803             /* LENNARD-JONES DISPERSION/REPULSION */
1804
1805             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1806             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
1807             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
1808             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
1809             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
1810
1811             d                = _fjsp_sub_v2r8(r00,rswitch);
1812             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
1813             d2               = _fjsp_mul_v2r8(d,d);
1814             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
1815
1816             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
1817
1818             /* Evaluate switch function */
1819             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
1820             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
1821             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1822
1823             fscal            = fvdw;
1824
1825             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1826
1827             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1828
1829             /* Update vectorial force */
1830             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1831             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1832             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1833             
1834             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1835             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1836             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1837
1838             }
1839
1840             /**************************
1841              * CALCULATE INTERACTIONS *
1842              **************************/
1843
1844             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1845             {
1846
1847             /* REACTION-FIELD ELECTROSTATICS */
1848             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1849
1850             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1851
1852             fscal            = felec;
1853
1854             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1855
1856             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1857
1858             /* Update vectorial force */
1859             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1860             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1861             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1862             
1863             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1864             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1865             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1866
1867             }
1868
1869             /**************************
1870              * CALCULATE INTERACTIONS *
1871              **************************/
1872
1873             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1874             {
1875
1876             /* REACTION-FIELD ELECTROSTATICS */
1877             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1878
1879             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1880
1881             fscal            = felec;
1882
1883             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1884
1885             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1886
1887             /* Update vectorial force */
1888             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1889             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1890             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1891             
1892             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1893             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1894             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1895
1896             }
1897
1898             /**************************
1899              * CALCULATE INTERACTIONS *
1900              **************************/
1901
1902             if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
1903             {
1904
1905             /* REACTION-FIELD ELECTROSTATICS */
1906             felec            = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
1907
1908             cutoff_mask      = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
1909
1910             fscal            = felec;
1911
1912             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1913
1914             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1915
1916             /* Update vectorial force */
1917             fix1             = _fjsp_madd_v2r8(dx13,fscal,fix1);
1918             fiy1             = _fjsp_madd_v2r8(dy13,fscal,fiy1);
1919             fiz1             = _fjsp_madd_v2r8(dz13,fscal,fiz1);
1920             
1921             fjx3             = _fjsp_madd_v2r8(dx13,fscal,fjx3);
1922             fjy3             = _fjsp_madd_v2r8(dy13,fscal,fjy3);
1923             fjz3             = _fjsp_madd_v2r8(dz13,fscal,fjz3);
1924
1925             }
1926
1927             /**************************
1928              * CALCULATE INTERACTIONS *
1929              **************************/
1930
1931             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1932             {
1933
1934             /* REACTION-FIELD ELECTROSTATICS */
1935             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1936
1937             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1938
1939             fscal            = felec;
1940
1941             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1942
1943             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1944
1945             /* Update vectorial force */
1946             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1947             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1948             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1949             
1950             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1951             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1952             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1953
1954             }
1955
1956             /**************************
1957              * CALCULATE INTERACTIONS *
1958              **************************/
1959
1960             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1961             {
1962
1963             /* REACTION-FIELD ELECTROSTATICS */
1964             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1965
1966             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1967
1968             fscal            = felec;
1969
1970             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1971
1972             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1973
1974             /* Update vectorial force */
1975             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1976             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1977             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1978             
1979             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1980             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1981             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1982
1983             }
1984
1985             /**************************
1986              * CALCULATE INTERACTIONS *
1987              **************************/
1988
1989             if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
1990             {
1991
1992             /* REACTION-FIELD ELECTROSTATICS */
1993             felec            = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
1994
1995             cutoff_mask      = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
1996
1997             fscal            = felec;
1998
1999             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2000
2001             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2002
2003             /* Update vectorial force */
2004             fix2             = _fjsp_madd_v2r8(dx23,fscal,fix2);
2005             fiy2             = _fjsp_madd_v2r8(dy23,fscal,fiy2);
2006             fiz2             = _fjsp_madd_v2r8(dz23,fscal,fiz2);
2007             
2008             fjx3             = _fjsp_madd_v2r8(dx23,fscal,fjx3);
2009             fjy3             = _fjsp_madd_v2r8(dy23,fscal,fjy3);
2010             fjz3             = _fjsp_madd_v2r8(dz23,fscal,fjz3);
2011
2012             }
2013
2014             /**************************
2015              * CALCULATE INTERACTIONS *
2016              **************************/
2017
2018             if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
2019             {
2020
2021             /* REACTION-FIELD ELECTROSTATICS */
2022             felec            = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
2023
2024             cutoff_mask      = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
2025
2026             fscal            = felec;
2027
2028             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2029
2030             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2031
2032             /* Update vectorial force */
2033             fix3             = _fjsp_madd_v2r8(dx31,fscal,fix3);
2034             fiy3             = _fjsp_madd_v2r8(dy31,fscal,fiy3);
2035             fiz3             = _fjsp_madd_v2r8(dz31,fscal,fiz3);
2036             
2037             fjx1             = _fjsp_madd_v2r8(dx31,fscal,fjx1);
2038             fjy1             = _fjsp_madd_v2r8(dy31,fscal,fjy1);
2039             fjz1             = _fjsp_madd_v2r8(dz31,fscal,fjz1);
2040
2041             }
2042
2043             /**************************
2044              * CALCULATE INTERACTIONS *
2045              **************************/
2046
2047             if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
2048             {
2049
2050             /* REACTION-FIELD ELECTROSTATICS */
2051             felec            = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
2052
2053             cutoff_mask      = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
2054
2055             fscal            = felec;
2056
2057             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2058
2059             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2060
2061             /* Update vectorial force */
2062             fix3             = _fjsp_madd_v2r8(dx32,fscal,fix3);
2063             fiy3             = _fjsp_madd_v2r8(dy32,fscal,fiy3);
2064             fiz3             = _fjsp_madd_v2r8(dz32,fscal,fiz3);
2065             
2066             fjx2             = _fjsp_madd_v2r8(dx32,fscal,fjx2);
2067             fjy2             = _fjsp_madd_v2r8(dy32,fscal,fjy2);
2068             fjz2             = _fjsp_madd_v2r8(dz32,fscal,fjz2);
2069
2070             }
2071
2072             /**************************
2073              * CALCULATE INTERACTIONS *
2074              **************************/
2075
2076             if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
2077             {
2078
2079             /* REACTION-FIELD ELECTROSTATICS */
2080             felec            = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
2081
2082             cutoff_mask      = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
2083
2084             fscal            = felec;
2085
2086             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
2087
2088             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2089
2090             /* Update vectorial force */
2091             fix3             = _fjsp_madd_v2r8(dx33,fscal,fix3);
2092             fiy3             = _fjsp_madd_v2r8(dy33,fscal,fiy3);
2093             fiz3             = _fjsp_madd_v2r8(dz33,fscal,fiz3);
2094             
2095             fjx3             = _fjsp_madd_v2r8(dx33,fscal,fjx3);
2096             fjy3             = _fjsp_madd_v2r8(dy33,fscal,fjy3);
2097             fjz3             = _fjsp_madd_v2r8(dz33,fscal,fjz3);
2098
2099             }
2100
2101             gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
2102
2103             /* Inner loop uses 359 flops */
2104         }
2105
2106         /* End of innermost loop */
2107
2108         gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
2109                                               f+i_coord_offset,fshift+i_shift_offset);
2110
2111         /* Increment number of inner iterations */
2112         inneriter                  += j_index_end - j_index_start;
2113
2114         /* Outer loop uses 24 flops */
2115     }
2116
2117     /* Increment number of outer iterations */
2118     outeriter        += nri;
2119
2120     /* Update outer/inner flops */
2121
2122     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*359);
2123 }