a3bf11486194ad0e58e9d3a7fff2130c5953ad51
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "nrnb.h"
46
47 #include "kernelutil_sparc64_hpc_ace_double.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
51  * Electrostatics interaction: ReactionField
52  * VdW interaction:            LennardJones
53  * Geometry:                   Water3-Water3
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67      * just 0 for non-waters.
68      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69      * jnr indices corresponding to data put in the four positions in the SIMD register.
70      */
71     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
72     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
73     int              jnrA,jnrB;
74     int              j_coord_offsetA,j_coord_offsetB;
75     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
76     real             rcutoff_scalar;
77     real             *shiftvec,*fshift,*x,*f;
78     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
79     int              vdwioffset0;
80     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
81     int              vdwioffset1;
82     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
83     int              vdwioffset2;
84     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85     int              vdwjidx0A,vdwjidx0B;
86     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87     int              vdwjidx1A,vdwjidx1B;
88     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89     int              vdwjidx2A,vdwjidx2B;
90     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
101     real             *charge;
102     int              nvdwtype;
103     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
104     int              *vdwtype;
105     real             *vdwparam;
106     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
107     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
108     _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
109     real             rswitch_scalar,d_scalar;
110     _fjsp_v2r8       itab_tmp;
111     _fjsp_v2r8       dummy_mask,cutoff_mask;
112     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
113     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
114     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
115
116     x                = xx[0];
117     f                = ff[0];
118
119     nri              = nlist->nri;
120     iinr             = nlist->iinr;
121     jindex           = nlist->jindex;
122     jjnr             = nlist->jjnr;
123     shiftidx         = nlist->shift;
124     gid              = nlist->gid;
125     shiftvec         = fr->shift_vec[0];
126     fshift           = fr->fshift[0];
127     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
128     charge           = mdatoms->chargeA;
129     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
130     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
131     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
132     nvdwtype         = fr->ntype;
133     vdwparam         = fr->nbfp;
134     vdwtype          = mdatoms->typeA;
135
136     /* Setup water-specific parameters */
137     inr              = nlist->iinr[0];
138     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
139     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
140     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
141     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
142
143     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
144     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
145     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
146     vdwjidx0A        = 2*vdwtype[inr+0];
147     qq00             = _fjsp_mul_v2r8(iq0,jq0);
148     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
149     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
150     qq01             = _fjsp_mul_v2r8(iq0,jq1);
151     qq02             = _fjsp_mul_v2r8(iq0,jq2);
152     qq10             = _fjsp_mul_v2r8(iq1,jq0);
153     qq11             = _fjsp_mul_v2r8(iq1,jq1);
154     qq12             = _fjsp_mul_v2r8(iq1,jq2);
155     qq20             = _fjsp_mul_v2r8(iq2,jq0);
156     qq21             = _fjsp_mul_v2r8(iq2,jq1);
157     qq22             = _fjsp_mul_v2r8(iq2,jq2);
158
159     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
160     rcutoff_scalar   = fr->rcoulomb;
161     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
162     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
163
164     rswitch_scalar   = fr->rvdw_switch;
165     rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
166     /* Setup switch parameters */
167     d_scalar         = rcutoff_scalar-rswitch_scalar;
168     d                = gmx_fjsp_set1_v2r8(d_scalar);
169     swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
170     swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
171     swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
172     swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
173     swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
174     swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
175
176     /* Avoid stupid compiler warnings */
177     jnrA = jnrB = 0;
178     j_coord_offsetA = 0;
179     j_coord_offsetB = 0;
180
181     outeriter        = 0;
182     inneriter        = 0;
183
184     /* Start outer loop over neighborlists */
185     for(iidx=0; iidx<nri; iidx++)
186     {
187         /* Load shift vector for this list */
188         i_shift_offset   = DIM*shiftidx[iidx];
189
190         /* Load limits for loop over neighbors */
191         j_index_start    = jindex[iidx];
192         j_index_end      = jindex[iidx+1];
193
194         /* Get outer coordinate index */
195         inr              = iinr[iidx];
196         i_coord_offset   = DIM*inr;
197
198         /* Load i particle coords and add shift vector */
199         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
200                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
201
202         fix0             = _fjsp_setzero_v2r8();
203         fiy0             = _fjsp_setzero_v2r8();
204         fiz0             = _fjsp_setzero_v2r8();
205         fix1             = _fjsp_setzero_v2r8();
206         fiy1             = _fjsp_setzero_v2r8();
207         fiz1             = _fjsp_setzero_v2r8();
208         fix2             = _fjsp_setzero_v2r8();
209         fiy2             = _fjsp_setzero_v2r8();
210         fiz2             = _fjsp_setzero_v2r8();
211
212         /* Reset potential sums */
213         velecsum         = _fjsp_setzero_v2r8();
214         vvdwsum          = _fjsp_setzero_v2r8();
215
216         /* Start inner kernel loop */
217         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
218         {
219
220             /* Get j neighbor index, and coordinate index */
221             jnrA             = jjnr[jidx];
222             jnrB             = jjnr[jidx+1];
223             j_coord_offsetA  = DIM*jnrA;
224             j_coord_offsetB  = DIM*jnrB;
225
226             /* load j atom coordinates */
227             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
228                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
229
230             /* Calculate displacement vector */
231             dx00             = _fjsp_sub_v2r8(ix0,jx0);
232             dy00             = _fjsp_sub_v2r8(iy0,jy0);
233             dz00             = _fjsp_sub_v2r8(iz0,jz0);
234             dx01             = _fjsp_sub_v2r8(ix0,jx1);
235             dy01             = _fjsp_sub_v2r8(iy0,jy1);
236             dz01             = _fjsp_sub_v2r8(iz0,jz1);
237             dx02             = _fjsp_sub_v2r8(ix0,jx2);
238             dy02             = _fjsp_sub_v2r8(iy0,jy2);
239             dz02             = _fjsp_sub_v2r8(iz0,jz2);
240             dx10             = _fjsp_sub_v2r8(ix1,jx0);
241             dy10             = _fjsp_sub_v2r8(iy1,jy0);
242             dz10             = _fjsp_sub_v2r8(iz1,jz0);
243             dx11             = _fjsp_sub_v2r8(ix1,jx1);
244             dy11             = _fjsp_sub_v2r8(iy1,jy1);
245             dz11             = _fjsp_sub_v2r8(iz1,jz1);
246             dx12             = _fjsp_sub_v2r8(ix1,jx2);
247             dy12             = _fjsp_sub_v2r8(iy1,jy2);
248             dz12             = _fjsp_sub_v2r8(iz1,jz2);
249             dx20             = _fjsp_sub_v2r8(ix2,jx0);
250             dy20             = _fjsp_sub_v2r8(iy2,jy0);
251             dz20             = _fjsp_sub_v2r8(iz2,jz0);
252             dx21             = _fjsp_sub_v2r8(ix2,jx1);
253             dy21             = _fjsp_sub_v2r8(iy2,jy1);
254             dz21             = _fjsp_sub_v2r8(iz2,jz1);
255             dx22             = _fjsp_sub_v2r8(ix2,jx2);
256             dy22             = _fjsp_sub_v2r8(iy2,jy2);
257             dz22             = _fjsp_sub_v2r8(iz2,jz2);
258
259             /* Calculate squared distance and things based on it */
260             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
261             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
262             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
263             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
264             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
265             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
266             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
267             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
268             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
269
270             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
271             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
272             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
273             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
274             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
275             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
276             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
277             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
278             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
279
280             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
281             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
282             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
283             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
284             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
285             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
286             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
287             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
288             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
289
290             fjx0             = _fjsp_setzero_v2r8();
291             fjy0             = _fjsp_setzero_v2r8();
292             fjz0             = _fjsp_setzero_v2r8();
293             fjx1             = _fjsp_setzero_v2r8();
294             fjy1             = _fjsp_setzero_v2r8();
295             fjz1             = _fjsp_setzero_v2r8();
296             fjx2             = _fjsp_setzero_v2r8();
297             fjy2             = _fjsp_setzero_v2r8();
298             fjz2             = _fjsp_setzero_v2r8();
299
300             /**************************
301              * CALCULATE INTERACTIONS *
302              **************************/
303
304             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
305             {
306
307             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
308
309             /* REACTION-FIELD ELECTROSTATICS */
310             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
311             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
312
313             /* LENNARD-JONES DISPERSION/REPULSION */
314
315             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
316             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
317             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
318             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
319             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
320
321             d                = _fjsp_sub_v2r8(r00,rswitch);
322             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
323             d2               = _fjsp_mul_v2r8(d,d);
324             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
325
326             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
327
328             /* Evaluate switch function */
329             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
330             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
331             vvdw             = _fjsp_mul_v2r8(vvdw,sw);
332             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
333
334             /* Update potential sum for this i atom from the interaction with this j atom. */
335             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
336             velecsum         = _fjsp_add_v2r8(velecsum,velec);
337             vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
338             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
339
340             fscal            = _fjsp_add_v2r8(felec,fvdw);
341
342             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
343
344             /* Update vectorial force */
345             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
346             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
347             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
348             
349             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
350             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
351             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
352
353             }
354
355             /**************************
356              * CALCULATE INTERACTIONS *
357              **************************/
358
359             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
360             {
361
362             /* REACTION-FIELD ELECTROSTATICS */
363             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
364             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
365
366             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
367
368             /* Update potential sum for this i atom from the interaction with this j atom. */
369             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
370             velecsum         = _fjsp_add_v2r8(velecsum,velec);
371
372             fscal            = felec;
373
374             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
375
376             /* Update vectorial force */
377             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
378             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
379             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
380             
381             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
382             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
383             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
384
385             }
386
387             /**************************
388              * CALCULATE INTERACTIONS *
389              **************************/
390
391             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
392             {
393
394             /* REACTION-FIELD ELECTROSTATICS */
395             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
396             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
397
398             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
399
400             /* Update potential sum for this i atom from the interaction with this j atom. */
401             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
402             velecsum         = _fjsp_add_v2r8(velecsum,velec);
403
404             fscal            = felec;
405
406             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
407
408             /* Update vectorial force */
409             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
410             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
411             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
412             
413             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
414             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
415             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
416
417             }
418
419             /**************************
420              * CALCULATE INTERACTIONS *
421              **************************/
422
423             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
424             {
425
426             /* REACTION-FIELD ELECTROSTATICS */
427             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
428             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
429
430             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
431
432             /* Update potential sum for this i atom from the interaction with this j atom. */
433             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
434             velecsum         = _fjsp_add_v2r8(velecsum,velec);
435
436             fscal            = felec;
437
438             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
439
440             /* Update vectorial force */
441             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
442             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
443             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
444             
445             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
446             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
447             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
448
449             }
450
451             /**************************
452              * CALCULATE INTERACTIONS *
453              **************************/
454
455             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
456             {
457
458             /* REACTION-FIELD ELECTROSTATICS */
459             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
460             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
461
462             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
463
464             /* Update potential sum for this i atom from the interaction with this j atom. */
465             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
466             velecsum         = _fjsp_add_v2r8(velecsum,velec);
467
468             fscal            = felec;
469
470             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
471
472             /* Update vectorial force */
473             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
474             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
475             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
476             
477             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
478             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
479             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
480
481             }
482
483             /**************************
484              * CALCULATE INTERACTIONS *
485              **************************/
486
487             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
488             {
489
490             /* REACTION-FIELD ELECTROSTATICS */
491             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
492             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
493
494             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
495
496             /* Update potential sum for this i atom from the interaction with this j atom. */
497             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
498             velecsum         = _fjsp_add_v2r8(velecsum,velec);
499
500             fscal            = felec;
501
502             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
503
504             /* Update vectorial force */
505             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
506             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
507             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
508             
509             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
510             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
511             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
512
513             }
514
515             /**************************
516              * CALCULATE INTERACTIONS *
517              **************************/
518
519             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
520             {
521
522             /* REACTION-FIELD ELECTROSTATICS */
523             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
524             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
525
526             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
527
528             /* Update potential sum for this i atom from the interaction with this j atom. */
529             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
530             velecsum         = _fjsp_add_v2r8(velecsum,velec);
531
532             fscal            = felec;
533
534             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
535
536             /* Update vectorial force */
537             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
538             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
539             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
540             
541             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
542             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
543             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
544
545             }
546
547             /**************************
548              * CALCULATE INTERACTIONS *
549              **************************/
550
551             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
552             {
553
554             /* REACTION-FIELD ELECTROSTATICS */
555             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
556             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
557
558             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
559
560             /* Update potential sum for this i atom from the interaction with this j atom. */
561             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
562             velecsum         = _fjsp_add_v2r8(velecsum,velec);
563
564             fscal            = felec;
565
566             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
567
568             /* Update vectorial force */
569             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
570             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
571             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
572             
573             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
574             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
575             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
576
577             }
578
579             /**************************
580              * CALCULATE INTERACTIONS *
581              **************************/
582
583             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
584             {
585
586             /* REACTION-FIELD ELECTROSTATICS */
587             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
588             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
589
590             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
591
592             /* Update potential sum for this i atom from the interaction with this j atom. */
593             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
594             velecsum         = _fjsp_add_v2r8(velecsum,velec);
595
596             fscal            = felec;
597
598             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
599
600             /* Update vectorial force */
601             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
602             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
603             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
604             
605             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
606             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
607             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
608
609             }
610
611             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
612
613             /* Inner loop uses 385 flops */
614         }
615
616         if(jidx<j_index_end)
617         {
618
619             jnrA             = jjnr[jidx];
620             j_coord_offsetA  = DIM*jnrA;
621
622             /* load j atom coordinates */
623             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
624                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
625
626             /* Calculate displacement vector */
627             dx00             = _fjsp_sub_v2r8(ix0,jx0);
628             dy00             = _fjsp_sub_v2r8(iy0,jy0);
629             dz00             = _fjsp_sub_v2r8(iz0,jz0);
630             dx01             = _fjsp_sub_v2r8(ix0,jx1);
631             dy01             = _fjsp_sub_v2r8(iy0,jy1);
632             dz01             = _fjsp_sub_v2r8(iz0,jz1);
633             dx02             = _fjsp_sub_v2r8(ix0,jx2);
634             dy02             = _fjsp_sub_v2r8(iy0,jy2);
635             dz02             = _fjsp_sub_v2r8(iz0,jz2);
636             dx10             = _fjsp_sub_v2r8(ix1,jx0);
637             dy10             = _fjsp_sub_v2r8(iy1,jy0);
638             dz10             = _fjsp_sub_v2r8(iz1,jz0);
639             dx11             = _fjsp_sub_v2r8(ix1,jx1);
640             dy11             = _fjsp_sub_v2r8(iy1,jy1);
641             dz11             = _fjsp_sub_v2r8(iz1,jz1);
642             dx12             = _fjsp_sub_v2r8(ix1,jx2);
643             dy12             = _fjsp_sub_v2r8(iy1,jy2);
644             dz12             = _fjsp_sub_v2r8(iz1,jz2);
645             dx20             = _fjsp_sub_v2r8(ix2,jx0);
646             dy20             = _fjsp_sub_v2r8(iy2,jy0);
647             dz20             = _fjsp_sub_v2r8(iz2,jz0);
648             dx21             = _fjsp_sub_v2r8(ix2,jx1);
649             dy21             = _fjsp_sub_v2r8(iy2,jy1);
650             dz21             = _fjsp_sub_v2r8(iz2,jz1);
651             dx22             = _fjsp_sub_v2r8(ix2,jx2);
652             dy22             = _fjsp_sub_v2r8(iy2,jy2);
653             dz22             = _fjsp_sub_v2r8(iz2,jz2);
654
655             /* Calculate squared distance and things based on it */
656             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
657             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
658             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
659             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
660             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
661             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
662             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
663             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
664             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
665
666             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
667             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
668             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
669             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
670             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
671             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
672             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
673             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
674             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
675
676             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
677             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
678             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
679             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
680             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
681             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
682             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
683             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
684             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
685
686             fjx0             = _fjsp_setzero_v2r8();
687             fjy0             = _fjsp_setzero_v2r8();
688             fjz0             = _fjsp_setzero_v2r8();
689             fjx1             = _fjsp_setzero_v2r8();
690             fjy1             = _fjsp_setzero_v2r8();
691             fjz1             = _fjsp_setzero_v2r8();
692             fjx2             = _fjsp_setzero_v2r8();
693             fjy2             = _fjsp_setzero_v2r8();
694             fjz2             = _fjsp_setzero_v2r8();
695
696             /**************************
697              * CALCULATE INTERACTIONS *
698              **************************/
699
700             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
701             {
702
703             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
704
705             /* REACTION-FIELD ELECTROSTATICS */
706             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
707             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
708
709             /* LENNARD-JONES DISPERSION/REPULSION */
710
711             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
712             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
713             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
714             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
715             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
716
717             d                = _fjsp_sub_v2r8(r00,rswitch);
718             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
719             d2               = _fjsp_mul_v2r8(d,d);
720             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
721
722             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
723
724             /* Evaluate switch function */
725             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
726             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
727             vvdw             = _fjsp_mul_v2r8(vvdw,sw);
728             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
729
730             /* Update potential sum for this i atom from the interaction with this j atom. */
731             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
732             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
733             velecsum         = _fjsp_add_v2r8(velecsum,velec);
734             vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
735             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
736             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
737
738             fscal            = _fjsp_add_v2r8(felec,fvdw);
739
740             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
741
742             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
743
744             /* Update vectorial force */
745             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
746             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
747             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
748             
749             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
750             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
751             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
752
753             }
754
755             /**************************
756              * CALCULATE INTERACTIONS *
757              **************************/
758
759             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
760             {
761
762             /* REACTION-FIELD ELECTROSTATICS */
763             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
764             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
765
766             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
767
768             /* Update potential sum for this i atom from the interaction with this j atom. */
769             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
770             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
771             velecsum         = _fjsp_add_v2r8(velecsum,velec);
772
773             fscal            = felec;
774
775             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
776
777             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
778
779             /* Update vectorial force */
780             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
781             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
782             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
783             
784             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
785             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
786             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
787
788             }
789
790             /**************************
791              * CALCULATE INTERACTIONS *
792              **************************/
793
794             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
795             {
796
797             /* REACTION-FIELD ELECTROSTATICS */
798             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
799             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
800
801             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
802
803             /* Update potential sum for this i atom from the interaction with this j atom. */
804             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
805             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
806             velecsum         = _fjsp_add_v2r8(velecsum,velec);
807
808             fscal            = felec;
809
810             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
811
812             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
813
814             /* Update vectorial force */
815             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
816             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
817             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
818             
819             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
820             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
821             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
822
823             }
824
825             /**************************
826              * CALCULATE INTERACTIONS *
827              **************************/
828
829             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
830             {
831
832             /* REACTION-FIELD ELECTROSTATICS */
833             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
834             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
835
836             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
837
838             /* Update potential sum for this i atom from the interaction with this j atom. */
839             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
840             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
841             velecsum         = _fjsp_add_v2r8(velecsum,velec);
842
843             fscal            = felec;
844
845             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
846
847             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
848
849             /* Update vectorial force */
850             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
851             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
852             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
853             
854             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
855             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
856             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
857
858             }
859
860             /**************************
861              * CALCULATE INTERACTIONS *
862              **************************/
863
864             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
865             {
866
867             /* REACTION-FIELD ELECTROSTATICS */
868             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
869             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
870
871             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
872
873             /* Update potential sum for this i atom from the interaction with this j atom. */
874             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
875             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
876             velecsum         = _fjsp_add_v2r8(velecsum,velec);
877
878             fscal            = felec;
879
880             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
881
882             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
883
884             /* Update vectorial force */
885             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
886             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
887             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
888             
889             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
890             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
891             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
892
893             }
894
895             /**************************
896              * CALCULATE INTERACTIONS *
897              **************************/
898
899             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
900             {
901
902             /* REACTION-FIELD ELECTROSTATICS */
903             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
904             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
905
906             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
907
908             /* Update potential sum for this i atom from the interaction with this j atom. */
909             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
910             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
911             velecsum         = _fjsp_add_v2r8(velecsum,velec);
912
913             fscal            = felec;
914
915             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
916
917             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
918
919             /* Update vectorial force */
920             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
921             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
922             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
923             
924             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
925             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
926             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
927
928             }
929
930             /**************************
931              * CALCULATE INTERACTIONS *
932              **************************/
933
934             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
935             {
936
937             /* REACTION-FIELD ELECTROSTATICS */
938             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
939             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
940
941             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
942
943             /* Update potential sum for this i atom from the interaction with this j atom. */
944             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
945             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
946             velecsum         = _fjsp_add_v2r8(velecsum,velec);
947
948             fscal            = felec;
949
950             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
951
952             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
953
954             /* Update vectorial force */
955             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
956             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
957             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
958             
959             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
960             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
961             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
962
963             }
964
965             /**************************
966              * CALCULATE INTERACTIONS *
967              **************************/
968
969             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
970             {
971
972             /* REACTION-FIELD ELECTROSTATICS */
973             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
974             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
975
976             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
977
978             /* Update potential sum for this i atom from the interaction with this j atom. */
979             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
980             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
981             velecsum         = _fjsp_add_v2r8(velecsum,velec);
982
983             fscal            = felec;
984
985             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
986
987             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
988
989             /* Update vectorial force */
990             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
991             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
992             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
993             
994             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
995             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
996             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
997
998             }
999
1000             /**************************
1001              * CALCULATE INTERACTIONS *
1002              **************************/
1003
1004             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1005             {
1006
1007             /* REACTION-FIELD ELECTROSTATICS */
1008             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
1009             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1010
1011             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1012
1013             /* Update potential sum for this i atom from the interaction with this j atom. */
1014             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1015             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1016             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1017
1018             fscal            = felec;
1019
1020             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1021
1022             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1023
1024             /* Update vectorial force */
1025             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1026             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1027             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1028             
1029             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1030             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1031             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1032
1033             }
1034
1035             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1036
1037             /* Inner loop uses 385 flops */
1038         }
1039
1040         /* End of innermost loop */
1041
1042         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1043                                               f+i_coord_offset,fshift+i_shift_offset);
1044
1045         ggid                        = gid[iidx];
1046         /* Update potential energies */
1047         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1048         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1049
1050         /* Increment number of inner iterations */
1051         inneriter                  += j_index_end - j_index_start;
1052
1053         /* Outer loop uses 20 flops */
1054     }
1055
1056     /* Increment number of outer iterations */
1057     outeriter        += nri;
1058
1059     /* Update outer/inner flops */
1060
1061     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*385);
1062 }
1063 /*
1064  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
1065  * Electrostatics interaction: ReactionField
1066  * VdW interaction:            LennardJones
1067  * Geometry:                   Water3-Water3
1068  * Calculate force/pot:        Force
1069  */
1070 void
1071 nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
1072                     (t_nblist                    * gmx_restrict       nlist,
1073                      rvec                        * gmx_restrict          xx,
1074                      rvec                        * gmx_restrict          ff,
1075                      t_forcerec                  * gmx_restrict          fr,
1076                      t_mdatoms                   * gmx_restrict     mdatoms,
1077                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1078                      t_nrnb                      * gmx_restrict        nrnb)
1079 {
1080     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1081      * just 0 for non-waters.
1082      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1083      * jnr indices corresponding to data put in the four positions in the SIMD register.
1084      */
1085     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1086     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1087     int              jnrA,jnrB;
1088     int              j_coord_offsetA,j_coord_offsetB;
1089     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1090     real             rcutoff_scalar;
1091     real             *shiftvec,*fshift,*x,*f;
1092     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1093     int              vdwioffset0;
1094     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1095     int              vdwioffset1;
1096     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1097     int              vdwioffset2;
1098     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1099     int              vdwjidx0A,vdwjidx0B;
1100     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1101     int              vdwjidx1A,vdwjidx1B;
1102     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1103     int              vdwjidx2A,vdwjidx2B;
1104     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1105     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1106     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1107     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1108     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1109     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1110     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1111     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1112     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1113     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1114     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1115     real             *charge;
1116     int              nvdwtype;
1117     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1118     int              *vdwtype;
1119     real             *vdwparam;
1120     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1121     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1122     _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
1123     real             rswitch_scalar,d_scalar;
1124     _fjsp_v2r8       itab_tmp;
1125     _fjsp_v2r8       dummy_mask,cutoff_mask;
1126     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1127     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1128     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1129
1130     x                = xx[0];
1131     f                = ff[0];
1132
1133     nri              = nlist->nri;
1134     iinr             = nlist->iinr;
1135     jindex           = nlist->jindex;
1136     jjnr             = nlist->jjnr;
1137     shiftidx         = nlist->shift;
1138     gid              = nlist->gid;
1139     shiftvec         = fr->shift_vec[0];
1140     fshift           = fr->fshift[0];
1141     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1142     charge           = mdatoms->chargeA;
1143     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1144     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1145     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1146     nvdwtype         = fr->ntype;
1147     vdwparam         = fr->nbfp;
1148     vdwtype          = mdatoms->typeA;
1149
1150     /* Setup water-specific parameters */
1151     inr              = nlist->iinr[0];
1152     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1153     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1154     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1155     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1156
1157     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1158     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1159     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1160     vdwjidx0A        = 2*vdwtype[inr+0];
1161     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1162     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1163     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1164     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1165     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1166     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1167     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1168     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1169     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1170     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1171     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1172
1173     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1174     rcutoff_scalar   = fr->rcoulomb;
1175     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1176     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
1177
1178     rswitch_scalar   = fr->rvdw_switch;
1179     rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
1180     /* Setup switch parameters */
1181     d_scalar         = rcutoff_scalar-rswitch_scalar;
1182     d                = gmx_fjsp_set1_v2r8(d_scalar);
1183     swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
1184     swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
1185     swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
1186     swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
1187     swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
1188     swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
1189
1190     /* Avoid stupid compiler warnings */
1191     jnrA = jnrB = 0;
1192     j_coord_offsetA = 0;
1193     j_coord_offsetB = 0;
1194
1195     outeriter        = 0;
1196     inneriter        = 0;
1197
1198     /* Start outer loop over neighborlists */
1199     for(iidx=0; iidx<nri; iidx++)
1200     {
1201         /* Load shift vector for this list */
1202         i_shift_offset   = DIM*shiftidx[iidx];
1203
1204         /* Load limits for loop over neighbors */
1205         j_index_start    = jindex[iidx];
1206         j_index_end      = jindex[iidx+1];
1207
1208         /* Get outer coordinate index */
1209         inr              = iinr[iidx];
1210         i_coord_offset   = DIM*inr;
1211
1212         /* Load i particle coords and add shift vector */
1213         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1214                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1215
1216         fix0             = _fjsp_setzero_v2r8();
1217         fiy0             = _fjsp_setzero_v2r8();
1218         fiz0             = _fjsp_setzero_v2r8();
1219         fix1             = _fjsp_setzero_v2r8();
1220         fiy1             = _fjsp_setzero_v2r8();
1221         fiz1             = _fjsp_setzero_v2r8();
1222         fix2             = _fjsp_setzero_v2r8();
1223         fiy2             = _fjsp_setzero_v2r8();
1224         fiz2             = _fjsp_setzero_v2r8();
1225
1226         /* Start inner kernel loop */
1227         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1228         {
1229
1230             /* Get j neighbor index, and coordinate index */
1231             jnrA             = jjnr[jidx];
1232             jnrB             = jjnr[jidx+1];
1233             j_coord_offsetA  = DIM*jnrA;
1234             j_coord_offsetB  = DIM*jnrB;
1235
1236             /* load j atom coordinates */
1237             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1238                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1239
1240             /* Calculate displacement vector */
1241             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1242             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1243             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1244             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1245             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1246             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1247             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1248             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1249             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1250             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1251             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1252             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1253             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1254             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1255             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1256             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1257             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1258             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1259             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1260             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1261             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1262             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1263             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1264             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1265             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1266             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1267             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1268
1269             /* Calculate squared distance and things based on it */
1270             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1271             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1272             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1273             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1274             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1275             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1276             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1277             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1278             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1279
1280             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1281             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1282             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1283             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1284             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1285             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1286             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1287             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1288             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1289
1290             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1291             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1292             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1293             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1294             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1295             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1296             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1297             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1298             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1299
1300             fjx0             = _fjsp_setzero_v2r8();
1301             fjy0             = _fjsp_setzero_v2r8();
1302             fjz0             = _fjsp_setzero_v2r8();
1303             fjx1             = _fjsp_setzero_v2r8();
1304             fjy1             = _fjsp_setzero_v2r8();
1305             fjz1             = _fjsp_setzero_v2r8();
1306             fjx2             = _fjsp_setzero_v2r8();
1307             fjy2             = _fjsp_setzero_v2r8();
1308             fjz2             = _fjsp_setzero_v2r8();
1309
1310             /**************************
1311              * CALCULATE INTERACTIONS *
1312              **************************/
1313
1314             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1315             {
1316
1317             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1318
1319             /* REACTION-FIELD ELECTROSTATICS */
1320             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1321
1322             /* LENNARD-JONES DISPERSION/REPULSION */
1323
1324             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1325             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
1326             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
1327             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
1328             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
1329
1330             d                = _fjsp_sub_v2r8(r00,rswitch);
1331             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
1332             d2               = _fjsp_mul_v2r8(d,d);
1333             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
1334
1335             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
1336
1337             /* Evaluate switch function */
1338             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
1339             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
1340             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1341
1342             fscal            = _fjsp_add_v2r8(felec,fvdw);
1343
1344             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1345
1346             /* Update vectorial force */
1347             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1348             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1349             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1350             
1351             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1352             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1353             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1354
1355             }
1356
1357             /**************************
1358              * CALCULATE INTERACTIONS *
1359              **************************/
1360
1361             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1362             {
1363
1364             /* REACTION-FIELD ELECTROSTATICS */
1365             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1366
1367             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1368
1369             fscal            = felec;
1370
1371             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1372
1373             /* Update vectorial force */
1374             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1375             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1376             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1377             
1378             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1379             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1380             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1381
1382             }
1383
1384             /**************************
1385              * CALCULATE INTERACTIONS *
1386              **************************/
1387
1388             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1389             {
1390
1391             /* REACTION-FIELD ELECTROSTATICS */
1392             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1393
1394             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1395
1396             fscal            = felec;
1397
1398             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1399
1400             /* Update vectorial force */
1401             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1402             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1403             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1404             
1405             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1406             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1407             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1408
1409             }
1410
1411             /**************************
1412              * CALCULATE INTERACTIONS *
1413              **************************/
1414
1415             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1416             {
1417
1418             /* REACTION-FIELD ELECTROSTATICS */
1419             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1420
1421             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1422
1423             fscal            = felec;
1424
1425             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1426
1427             /* Update vectorial force */
1428             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1429             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1430             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1431             
1432             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1433             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1434             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1435
1436             }
1437
1438             /**************************
1439              * CALCULATE INTERACTIONS *
1440              **************************/
1441
1442             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1443             {
1444
1445             /* REACTION-FIELD ELECTROSTATICS */
1446             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1447
1448             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1449
1450             fscal            = felec;
1451
1452             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1453
1454             /* Update vectorial force */
1455             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1456             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1457             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1458             
1459             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1460             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1461             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1462
1463             }
1464
1465             /**************************
1466              * CALCULATE INTERACTIONS *
1467              **************************/
1468
1469             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1470             {
1471
1472             /* REACTION-FIELD ELECTROSTATICS */
1473             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1474
1475             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1476
1477             fscal            = felec;
1478
1479             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1480
1481             /* Update vectorial force */
1482             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1483             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1484             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1485             
1486             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1487             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1488             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1489
1490             }
1491
1492             /**************************
1493              * CALCULATE INTERACTIONS *
1494              **************************/
1495
1496             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1497             {
1498
1499             /* REACTION-FIELD ELECTROSTATICS */
1500             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1501
1502             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1503
1504             fscal            = felec;
1505
1506             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1507
1508             /* Update vectorial force */
1509             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1510             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1511             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1512             
1513             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1514             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1515             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1516
1517             }
1518
1519             /**************************
1520              * CALCULATE INTERACTIONS *
1521              **************************/
1522
1523             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1524             {
1525
1526             /* REACTION-FIELD ELECTROSTATICS */
1527             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1528
1529             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1530
1531             fscal            = felec;
1532
1533             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1534
1535             /* Update vectorial force */
1536             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1537             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1538             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1539             
1540             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1541             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1542             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1543
1544             }
1545
1546             /**************************
1547              * CALCULATE INTERACTIONS *
1548              **************************/
1549
1550             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1551             {
1552
1553             /* REACTION-FIELD ELECTROSTATICS */
1554             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1555
1556             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1557
1558             fscal            = felec;
1559
1560             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1561
1562             /* Update vectorial force */
1563             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1564             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1565             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1566             
1567             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1568             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1569             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1570
1571             }
1572
1573             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1574
1575             /* Inner loop uses 328 flops */
1576         }
1577
1578         if(jidx<j_index_end)
1579         {
1580
1581             jnrA             = jjnr[jidx];
1582             j_coord_offsetA  = DIM*jnrA;
1583
1584             /* load j atom coordinates */
1585             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1586                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1587
1588             /* Calculate displacement vector */
1589             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1590             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1591             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1592             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1593             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1594             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1595             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1596             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1597             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1598             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1599             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1600             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1601             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1602             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1603             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1604             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1605             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1606             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1607             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1608             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1609             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1610             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1611             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1612             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1613             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1614             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1615             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1616
1617             /* Calculate squared distance and things based on it */
1618             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1619             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1620             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1621             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1622             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1623             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1624             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1625             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1626             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1627
1628             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1629             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1630             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1631             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1632             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1633             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1634             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1635             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1636             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1637
1638             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1639             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1640             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1641             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1642             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1643             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1644             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1645             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1646             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1647
1648             fjx0             = _fjsp_setzero_v2r8();
1649             fjy0             = _fjsp_setzero_v2r8();
1650             fjz0             = _fjsp_setzero_v2r8();
1651             fjx1             = _fjsp_setzero_v2r8();
1652             fjy1             = _fjsp_setzero_v2r8();
1653             fjz1             = _fjsp_setzero_v2r8();
1654             fjx2             = _fjsp_setzero_v2r8();
1655             fjy2             = _fjsp_setzero_v2r8();
1656             fjz2             = _fjsp_setzero_v2r8();
1657
1658             /**************************
1659              * CALCULATE INTERACTIONS *
1660              **************************/
1661
1662             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1663             {
1664
1665             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1666
1667             /* REACTION-FIELD ELECTROSTATICS */
1668             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1669
1670             /* LENNARD-JONES DISPERSION/REPULSION */
1671
1672             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1673             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
1674             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
1675             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
1676             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
1677
1678             d                = _fjsp_sub_v2r8(r00,rswitch);
1679             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
1680             d2               = _fjsp_mul_v2r8(d,d);
1681             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
1682
1683             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
1684
1685             /* Evaluate switch function */
1686             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
1687             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
1688             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1689
1690             fscal            = _fjsp_add_v2r8(felec,fvdw);
1691
1692             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1693
1694             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1695
1696             /* Update vectorial force */
1697             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1698             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1699             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1700             
1701             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1702             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1703             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1704
1705             }
1706
1707             /**************************
1708              * CALCULATE INTERACTIONS *
1709              **************************/
1710
1711             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1712             {
1713
1714             /* REACTION-FIELD ELECTROSTATICS */
1715             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1716
1717             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1718
1719             fscal            = felec;
1720
1721             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1722
1723             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1724
1725             /* Update vectorial force */
1726             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1727             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1728             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1729             
1730             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1731             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1732             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1733
1734             }
1735
1736             /**************************
1737              * CALCULATE INTERACTIONS *
1738              **************************/
1739
1740             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1741             {
1742
1743             /* REACTION-FIELD ELECTROSTATICS */
1744             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1745
1746             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1747
1748             fscal            = felec;
1749
1750             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1751
1752             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1753
1754             /* Update vectorial force */
1755             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1756             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1757             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1758             
1759             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1760             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1761             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1762
1763             }
1764
1765             /**************************
1766              * CALCULATE INTERACTIONS *
1767              **************************/
1768
1769             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1770             {
1771
1772             /* REACTION-FIELD ELECTROSTATICS */
1773             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1774
1775             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1776
1777             fscal            = felec;
1778
1779             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1780
1781             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1782
1783             /* Update vectorial force */
1784             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1785             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1786             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1787             
1788             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1789             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1790             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1791
1792             }
1793
1794             /**************************
1795              * CALCULATE INTERACTIONS *
1796              **************************/
1797
1798             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1799             {
1800
1801             /* REACTION-FIELD ELECTROSTATICS */
1802             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1803
1804             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1805
1806             fscal            = felec;
1807
1808             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1809
1810             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1811
1812             /* Update vectorial force */
1813             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1814             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1815             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1816             
1817             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1818             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1819             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1820
1821             }
1822
1823             /**************************
1824              * CALCULATE INTERACTIONS *
1825              **************************/
1826
1827             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1828             {
1829
1830             /* REACTION-FIELD ELECTROSTATICS */
1831             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1832
1833             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1834
1835             fscal            = felec;
1836
1837             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1838
1839             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1840
1841             /* Update vectorial force */
1842             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1843             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1844             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1845             
1846             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1847             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1848             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1849
1850             }
1851
1852             /**************************
1853              * CALCULATE INTERACTIONS *
1854              **************************/
1855
1856             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1857             {
1858
1859             /* REACTION-FIELD ELECTROSTATICS */
1860             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1861
1862             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1863
1864             fscal            = felec;
1865
1866             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1867
1868             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1869
1870             /* Update vectorial force */
1871             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1872             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1873             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1874             
1875             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1876             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1877             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1878
1879             }
1880
1881             /**************************
1882              * CALCULATE INTERACTIONS *
1883              **************************/
1884
1885             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1886             {
1887
1888             /* REACTION-FIELD ELECTROSTATICS */
1889             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1890
1891             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1892
1893             fscal            = felec;
1894
1895             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1896
1897             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1898
1899             /* Update vectorial force */
1900             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1901             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1902             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1903             
1904             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1905             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1906             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1907
1908             }
1909
1910             /**************************
1911              * CALCULATE INTERACTIONS *
1912              **************************/
1913
1914             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1915             {
1916
1917             /* REACTION-FIELD ELECTROSTATICS */
1918             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1919
1920             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1921
1922             fscal            = felec;
1923
1924             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1925
1926             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1927
1928             /* Update vectorial force */
1929             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1930             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1931             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1932             
1933             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1934             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1935             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1936
1937             }
1938
1939             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1940
1941             /* Inner loop uses 328 flops */
1942         }
1943
1944         /* End of innermost loop */
1945
1946         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1947                                               f+i_coord_offset,fshift+i_shift_offset);
1948
1949         /* Increment number of inner iterations */
1950         inneriter                  += j_index_end - j_index_start;
1951
1952         /* Outer loop uses 18 flops */
1953     }
1954
1955     /* Increment number of outer iterations */
1956     outeriter        += nri;
1957
1958     /* Update outer/inner flops */
1959
1960     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*328);
1961 }