K-computer specific modifications
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_sparc64_hpc_ace_double.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "types/simple.h"
46 #include "gromacs/legacyheaders/vec.h"
47 #include "nrnb.h"
48
49 #include "kernelutil_sparc64_hpc_ace_double.h"
50
51 /*
52  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
53  * Electrostatics interaction: ReactionField
54  * VdW interaction:            LennardJones
55  * Geometry:                   Water3-Water3
56  * Calculate force/pot:        PotentialAndForce
57  */
58 void
59 nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
60                     (t_nblist                    * gmx_restrict       nlist,
61                      rvec                        * gmx_restrict          xx,
62                      rvec                        * gmx_restrict          ff,
63                      t_forcerec                  * gmx_restrict          fr,
64                      t_mdatoms                   * gmx_restrict     mdatoms,
65                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66                      t_nrnb                      * gmx_restrict        nrnb)
67 {
68     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69      * just 0 for non-waters.
70      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71      * jnr indices corresponding to data put in the four positions in the SIMD register.
72      */
73     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
74     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
75     int              jnrA,jnrB;
76     int              j_coord_offsetA,j_coord_offsetB;
77     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
78     real             rcutoff_scalar;
79     real             *shiftvec,*fshift,*x,*f;
80     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
81     int              vdwioffset0;
82     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
83     int              vdwioffset1;
84     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
85     int              vdwioffset2;
86     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87     int              vdwjidx0A,vdwjidx0B;
88     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89     int              vdwjidx1A,vdwjidx1B;
90     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91     int              vdwjidx2A,vdwjidx2B;
92     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
103     real             *charge;
104     int              nvdwtype;
105     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
106     int              *vdwtype;
107     real             *vdwparam;
108     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
109     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
110     _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
111     real             rswitch_scalar,d_scalar;
112     _fjsp_v2r8       itab_tmp;
113     _fjsp_v2r8       dummy_mask,cutoff_mask;
114     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
115     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
116     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
117
118     x                = xx[0];
119     f                = ff[0];
120
121     nri              = nlist->nri;
122     iinr             = nlist->iinr;
123     jindex           = nlist->jindex;
124     jjnr             = nlist->jjnr;
125     shiftidx         = nlist->shift;
126     gid              = nlist->gid;
127     shiftvec         = fr->shift_vec[0];
128     fshift           = fr->fshift[0];
129     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
130     charge           = mdatoms->chargeA;
131     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
132     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
133     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
134     nvdwtype         = fr->ntype;
135     vdwparam         = fr->nbfp;
136     vdwtype          = mdatoms->typeA;
137
138     /* Setup water-specific parameters */
139     inr              = nlist->iinr[0];
140     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
141     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
142     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
143     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
144
145     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
146     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
147     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
148     vdwjidx0A        = 2*vdwtype[inr+0];
149     qq00             = _fjsp_mul_v2r8(iq0,jq0);
150     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
151     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
152     qq01             = _fjsp_mul_v2r8(iq0,jq1);
153     qq02             = _fjsp_mul_v2r8(iq0,jq2);
154     qq10             = _fjsp_mul_v2r8(iq1,jq0);
155     qq11             = _fjsp_mul_v2r8(iq1,jq1);
156     qq12             = _fjsp_mul_v2r8(iq1,jq2);
157     qq20             = _fjsp_mul_v2r8(iq2,jq0);
158     qq21             = _fjsp_mul_v2r8(iq2,jq1);
159     qq22             = _fjsp_mul_v2r8(iq2,jq2);
160
161     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
162     rcutoff_scalar   = fr->rcoulomb;
163     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
164     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
165
166     rswitch_scalar   = fr->rvdw_switch;
167     rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
168     /* Setup switch parameters */
169     d_scalar         = rcutoff_scalar-rswitch_scalar;
170     d                = gmx_fjsp_set1_v2r8(d_scalar);
171     swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
172     swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
173     swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
174     swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
175     swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
176     swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
177
178     /* Avoid stupid compiler warnings */
179     jnrA = jnrB = 0;
180     j_coord_offsetA = 0;
181     j_coord_offsetB = 0;
182
183     outeriter        = 0;
184     inneriter        = 0;
185
186     /* Start outer loop over neighborlists */
187     for(iidx=0; iidx<nri; iidx++)
188     {
189         /* Load shift vector for this list */
190         i_shift_offset   = DIM*shiftidx[iidx];
191
192         /* Load limits for loop over neighbors */
193         j_index_start    = jindex[iidx];
194         j_index_end      = jindex[iidx+1];
195
196         /* Get outer coordinate index */
197         inr              = iinr[iidx];
198         i_coord_offset   = DIM*inr;
199
200         /* Load i particle coords and add shift vector */
201         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
202                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
203
204         fix0             = _fjsp_setzero_v2r8();
205         fiy0             = _fjsp_setzero_v2r8();
206         fiz0             = _fjsp_setzero_v2r8();
207         fix1             = _fjsp_setzero_v2r8();
208         fiy1             = _fjsp_setzero_v2r8();
209         fiz1             = _fjsp_setzero_v2r8();
210         fix2             = _fjsp_setzero_v2r8();
211         fiy2             = _fjsp_setzero_v2r8();
212         fiz2             = _fjsp_setzero_v2r8();
213
214         /* Reset potential sums */
215         velecsum         = _fjsp_setzero_v2r8();
216         vvdwsum          = _fjsp_setzero_v2r8();
217
218         /* Start inner kernel loop */
219         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
220         {
221
222             /* Get j neighbor index, and coordinate index */
223             jnrA             = jjnr[jidx];
224             jnrB             = jjnr[jidx+1];
225             j_coord_offsetA  = DIM*jnrA;
226             j_coord_offsetB  = DIM*jnrB;
227
228             /* load j atom coordinates */
229             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
230                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
231
232             /* Calculate displacement vector */
233             dx00             = _fjsp_sub_v2r8(ix0,jx0);
234             dy00             = _fjsp_sub_v2r8(iy0,jy0);
235             dz00             = _fjsp_sub_v2r8(iz0,jz0);
236             dx01             = _fjsp_sub_v2r8(ix0,jx1);
237             dy01             = _fjsp_sub_v2r8(iy0,jy1);
238             dz01             = _fjsp_sub_v2r8(iz0,jz1);
239             dx02             = _fjsp_sub_v2r8(ix0,jx2);
240             dy02             = _fjsp_sub_v2r8(iy0,jy2);
241             dz02             = _fjsp_sub_v2r8(iz0,jz2);
242             dx10             = _fjsp_sub_v2r8(ix1,jx0);
243             dy10             = _fjsp_sub_v2r8(iy1,jy0);
244             dz10             = _fjsp_sub_v2r8(iz1,jz0);
245             dx11             = _fjsp_sub_v2r8(ix1,jx1);
246             dy11             = _fjsp_sub_v2r8(iy1,jy1);
247             dz11             = _fjsp_sub_v2r8(iz1,jz1);
248             dx12             = _fjsp_sub_v2r8(ix1,jx2);
249             dy12             = _fjsp_sub_v2r8(iy1,jy2);
250             dz12             = _fjsp_sub_v2r8(iz1,jz2);
251             dx20             = _fjsp_sub_v2r8(ix2,jx0);
252             dy20             = _fjsp_sub_v2r8(iy2,jy0);
253             dz20             = _fjsp_sub_v2r8(iz2,jz0);
254             dx21             = _fjsp_sub_v2r8(ix2,jx1);
255             dy21             = _fjsp_sub_v2r8(iy2,jy1);
256             dz21             = _fjsp_sub_v2r8(iz2,jz1);
257             dx22             = _fjsp_sub_v2r8(ix2,jx2);
258             dy22             = _fjsp_sub_v2r8(iy2,jy2);
259             dz22             = _fjsp_sub_v2r8(iz2,jz2);
260
261             /* Calculate squared distance and things based on it */
262             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
263             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
264             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
265             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
266             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
267             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
268             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
269             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
270             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
271
272             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
273             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
274             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
275             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
276             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
277             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
278             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
279             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
280             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
281
282             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
283             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
284             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
285             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
286             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
287             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
288             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
289             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
290             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
291
292             fjx0             = _fjsp_setzero_v2r8();
293             fjy0             = _fjsp_setzero_v2r8();
294             fjz0             = _fjsp_setzero_v2r8();
295             fjx1             = _fjsp_setzero_v2r8();
296             fjy1             = _fjsp_setzero_v2r8();
297             fjz1             = _fjsp_setzero_v2r8();
298             fjx2             = _fjsp_setzero_v2r8();
299             fjy2             = _fjsp_setzero_v2r8();
300             fjz2             = _fjsp_setzero_v2r8();
301
302             /**************************
303              * CALCULATE INTERACTIONS *
304              **************************/
305
306             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
307             {
308
309             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
310
311             /* REACTION-FIELD ELECTROSTATICS */
312             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
313             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
314
315             /* LENNARD-JONES DISPERSION/REPULSION */
316
317             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
318             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
319             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
320             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
321             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
322
323             d                = _fjsp_sub_v2r8(r00,rswitch);
324             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
325             d2               = _fjsp_mul_v2r8(d,d);
326             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
327
328             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
329
330             /* Evaluate switch function */
331             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
332             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
333             vvdw             = _fjsp_mul_v2r8(vvdw,sw);
334             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
335
336             /* Update potential sum for this i atom from the interaction with this j atom. */
337             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
338             velecsum         = _fjsp_add_v2r8(velecsum,velec);
339             vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
340             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
341
342             fscal            = _fjsp_add_v2r8(felec,fvdw);
343
344             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
345
346             /* Update vectorial force */
347             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
348             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
349             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
350             
351             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
352             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
353             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
354
355             }
356
357             /**************************
358              * CALCULATE INTERACTIONS *
359              **************************/
360
361             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
362             {
363
364             /* REACTION-FIELD ELECTROSTATICS */
365             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
366             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
367
368             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
369
370             /* Update potential sum for this i atom from the interaction with this j atom. */
371             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
372             velecsum         = _fjsp_add_v2r8(velecsum,velec);
373
374             fscal            = felec;
375
376             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
377
378             /* Update vectorial force */
379             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
380             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
381             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
382             
383             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
384             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
385             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
386
387             }
388
389             /**************************
390              * CALCULATE INTERACTIONS *
391              **************************/
392
393             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
394             {
395
396             /* REACTION-FIELD ELECTROSTATICS */
397             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
398             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
399
400             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
401
402             /* Update potential sum for this i atom from the interaction with this j atom. */
403             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
404             velecsum         = _fjsp_add_v2r8(velecsum,velec);
405
406             fscal            = felec;
407
408             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
409
410             /* Update vectorial force */
411             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
412             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
413             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
414             
415             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
416             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
417             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
418
419             }
420
421             /**************************
422              * CALCULATE INTERACTIONS *
423              **************************/
424
425             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
426             {
427
428             /* REACTION-FIELD ELECTROSTATICS */
429             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
430             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
431
432             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
433
434             /* Update potential sum for this i atom from the interaction with this j atom. */
435             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
436             velecsum         = _fjsp_add_v2r8(velecsum,velec);
437
438             fscal            = felec;
439
440             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
441
442             /* Update vectorial force */
443             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
444             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
445             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
446             
447             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
448             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
449             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
450
451             }
452
453             /**************************
454              * CALCULATE INTERACTIONS *
455              **************************/
456
457             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
458             {
459
460             /* REACTION-FIELD ELECTROSTATICS */
461             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
462             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
463
464             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
465
466             /* Update potential sum for this i atom from the interaction with this j atom. */
467             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
468             velecsum         = _fjsp_add_v2r8(velecsum,velec);
469
470             fscal            = felec;
471
472             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
473
474             /* Update vectorial force */
475             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
476             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
477             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
478             
479             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
480             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
481             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
482
483             }
484
485             /**************************
486              * CALCULATE INTERACTIONS *
487              **************************/
488
489             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
490             {
491
492             /* REACTION-FIELD ELECTROSTATICS */
493             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
494             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
495
496             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
497
498             /* Update potential sum for this i atom from the interaction with this j atom. */
499             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
500             velecsum         = _fjsp_add_v2r8(velecsum,velec);
501
502             fscal            = felec;
503
504             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
505
506             /* Update vectorial force */
507             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
508             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
509             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
510             
511             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
512             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
513             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
514
515             }
516
517             /**************************
518              * CALCULATE INTERACTIONS *
519              **************************/
520
521             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
522             {
523
524             /* REACTION-FIELD ELECTROSTATICS */
525             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
526             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
527
528             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
529
530             /* Update potential sum for this i atom from the interaction with this j atom. */
531             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
532             velecsum         = _fjsp_add_v2r8(velecsum,velec);
533
534             fscal            = felec;
535
536             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
537
538             /* Update vectorial force */
539             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
540             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
541             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
542             
543             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
544             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
545             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
546
547             }
548
549             /**************************
550              * CALCULATE INTERACTIONS *
551              **************************/
552
553             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
554             {
555
556             /* REACTION-FIELD ELECTROSTATICS */
557             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
558             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
559
560             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
561
562             /* Update potential sum for this i atom from the interaction with this j atom. */
563             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
564             velecsum         = _fjsp_add_v2r8(velecsum,velec);
565
566             fscal            = felec;
567
568             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
569
570             /* Update vectorial force */
571             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
572             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
573             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
574             
575             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
576             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
577             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
578
579             }
580
581             /**************************
582              * CALCULATE INTERACTIONS *
583              **************************/
584
585             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
586             {
587
588             /* REACTION-FIELD ELECTROSTATICS */
589             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
590             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
591
592             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
593
594             /* Update potential sum for this i atom from the interaction with this j atom. */
595             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
596             velecsum         = _fjsp_add_v2r8(velecsum,velec);
597
598             fscal            = felec;
599
600             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
601
602             /* Update vectorial force */
603             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
604             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
605             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
606             
607             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
608             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
609             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
610
611             }
612
613             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
614
615             /* Inner loop uses 385 flops */
616         }
617
618         if(jidx<j_index_end)
619         {
620
621             jnrA             = jjnr[jidx];
622             j_coord_offsetA  = DIM*jnrA;
623
624             /* load j atom coordinates */
625             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
626                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
627
628             /* Calculate displacement vector */
629             dx00             = _fjsp_sub_v2r8(ix0,jx0);
630             dy00             = _fjsp_sub_v2r8(iy0,jy0);
631             dz00             = _fjsp_sub_v2r8(iz0,jz0);
632             dx01             = _fjsp_sub_v2r8(ix0,jx1);
633             dy01             = _fjsp_sub_v2r8(iy0,jy1);
634             dz01             = _fjsp_sub_v2r8(iz0,jz1);
635             dx02             = _fjsp_sub_v2r8(ix0,jx2);
636             dy02             = _fjsp_sub_v2r8(iy0,jy2);
637             dz02             = _fjsp_sub_v2r8(iz0,jz2);
638             dx10             = _fjsp_sub_v2r8(ix1,jx0);
639             dy10             = _fjsp_sub_v2r8(iy1,jy0);
640             dz10             = _fjsp_sub_v2r8(iz1,jz0);
641             dx11             = _fjsp_sub_v2r8(ix1,jx1);
642             dy11             = _fjsp_sub_v2r8(iy1,jy1);
643             dz11             = _fjsp_sub_v2r8(iz1,jz1);
644             dx12             = _fjsp_sub_v2r8(ix1,jx2);
645             dy12             = _fjsp_sub_v2r8(iy1,jy2);
646             dz12             = _fjsp_sub_v2r8(iz1,jz2);
647             dx20             = _fjsp_sub_v2r8(ix2,jx0);
648             dy20             = _fjsp_sub_v2r8(iy2,jy0);
649             dz20             = _fjsp_sub_v2r8(iz2,jz0);
650             dx21             = _fjsp_sub_v2r8(ix2,jx1);
651             dy21             = _fjsp_sub_v2r8(iy2,jy1);
652             dz21             = _fjsp_sub_v2r8(iz2,jz1);
653             dx22             = _fjsp_sub_v2r8(ix2,jx2);
654             dy22             = _fjsp_sub_v2r8(iy2,jy2);
655             dz22             = _fjsp_sub_v2r8(iz2,jz2);
656
657             /* Calculate squared distance and things based on it */
658             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
659             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
660             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
661             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
662             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
663             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
664             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
665             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
666             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
667
668             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
669             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
670             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
671             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
672             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
673             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
674             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
675             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
676             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
677
678             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
679             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
680             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
681             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
682             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
683             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
684             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
685             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
686             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
687
688             fjx0             = _fjsp_setzero_v2r8();
689             fjy0             = _fjsp_setzero_v2r8();
690             fjz0             = _fjsp_setzero_v2r8();
691             fjx1             = _fjsp_setzero_v2r8();
692             fjy1             = _fjsp_setzero_v2r8();
693             fjz1             = _fjsp_setzero_v2r8();
694             fjx2             = _fjsp_setzero_v2r8();
695             fjy2             = _fjsp_setzero_v2r8();
696             fjz2             = _fjsp_setzero_v2r8();
697
698             /**************************
699              * CALCULATE INTERACTIONS *
700              **************************/
701
702             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
703             {
704
705             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
706
707             /* REACTION-FIELD ELECTROSTATICS */
708             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
709             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
710
711             /* LENNARD-JONES DISPERSION/REPULSION */
712
713             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
714             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
715             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
716             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
717             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
718
719             d                = _fjsp_sub_v2r8(r00,rswitch);
720             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
721             d2               = _fjsp_mul_v2r8(d,d);
722             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
723
724             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
725
726             /* Evaluate switch function */
727             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
728             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
729             vvdw             = _fjsp_mul_v2r8(vvdw,sw);
730             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
731
732             /* Update potential sum for this i atom from the interaction with this j atom. */
733             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
734             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
735             velecsum         = _fjsp_add_v2r8(velecsum,velec);
736             vvdw             = _fjsp_and_v2r8(vvdw,cutoff_mask);
737             vvdw             = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
738             vvdwsum          = _fjsp_add_v2r8(vvdwsum,vvdw);
739
740             fscal            = _fjsp_add_v2r8(felec,fvdw);
741
742             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
743
744             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
745
746             /* Update vectorial force */
747             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
748             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
749             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
750             
751             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
752             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
753             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
754
755             }
756
757             /**************************
758              * CALCULATE INTERACTIONS *
759              **************************/
760
761             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
762             {
763
764             /* REACTION-FIELD ELECTROSTATICS */
765             velec            = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
766             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
767
768             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
769
770             /* Update potential sum for this i atom from the interaction with this j atom. */
771             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
772             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
773             velecsum         = _fjsp_add_v2r8(velecsum,velec);
774
775             fscal            = felec;
776
777             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
778
779             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
780
781             /* Update vectorial force */
782             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
783             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
784             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
785             
786             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
787             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
788             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
789
790             }
791
792             /**************************
793              * CALCULATE INTERACTIONS *
794              **************************/
795
796             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
797             {
798
799             /* REACTION-FIELD ELECTROSTATICS */
800             velec            = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
801             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
802
803             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
804
805             /* Update potential sum for this i atom from the interaction with this j atom. */
806             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
807             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
808             velecsum         = _fjsp_add_v2r8(velecsum,velec);
809
810             fscal            = felec;
811
812             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
813
814             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
815
816             /* Update vectorial force */
817             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
818             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
819             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
820             
821             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
822             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
823             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
824
825             }
826
827             /**************************
828              * CALCULATE INTERACTIONS *
829              **************************/
830
831             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
832             {
833
834             /* REACTION-FIELD ELECTROSTATICS */
835             velec            = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
836             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
837
838             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
839
840             /* Update potential sum for this i atom from the interaction with this j atom. */
841             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
842             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
843             velecsum         = _fjsp_add_v2r8(velecsum,velec);
844
845             fscal            = felec;
846
847             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
848
849             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
850
851             /* Update vectorial force */
852             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
853             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
854             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
855             
856             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
857             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
858             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
859
860             }
861
862             /**************************
863              * CALCULATE INTERACTIONS *
864              **************************/
865
866             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
867             {
868
869             /* REACTION-FIELD ELECTROSTATICS */
870             velec            = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
871             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
872
873             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
874
875             /* Update potential sum for this i atom from the interaction with this j atom. */
876             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
877             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
878             velecsum         = _fjsp_add_v2r8(velecsum,velec);
879
880             fscal            = felec;
881
882             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
883
884             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
885
886             /* Update vectorial force */
887             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
888             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
889             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
890             
891             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
892             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
893             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
894
895             }
896
897             /**************************
898              * CALCULATE INTERACTIONS *
899              **************************/
900
901             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
902             {
903
904             /* REACTION-FIELD ELECTROSTATICS */
905             velec            = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
906             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
907
908             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
909
910             /* Update potential sum for this i atom from the interaction with this j atom. */
911             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
912             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
913             velecsum         = _fjsp_add_v2r8(velecsum,velec);
914
915             fscal            = felec;
916
917             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
918
919             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
920
921             /* Update vectorial force */
922             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
923             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
924             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
925             
926             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
927             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
928             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
929
930             }
931
932             /**************************
933              * CALCULATE INTERACTIONS *
934              **************************/
935
936             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
937             {
938
939             /* REACTION-FIELD ELECTROSTATICS */
940             velec            = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
941             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
942
943             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
944
945             /* Update potential sum for this i atom from the interaction with this j atom. */
946             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
947             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
948             velecsum         = _fjsp_add_v2r8(velecsum,velec);
949
950             fscal            = felec;
951
952             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
953
954             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
955
956             /* Update vectorial force */
957             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
958             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
959             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
960             
961             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
962             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
963             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
964
965             }
966
967             /**************************
968              * CALCULATE INTERACTIONS *
969              **************************/
970
971             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
972             {
973
974             /* REACTION-FIELD ELECTROSTATICS */
975             velec            = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
976             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
977
978             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
979
980             /* Update potential sum for this i atom from the interaction with this j atom. */
981             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
982             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
983             velecsum         = _fjsp_add_v2r8(velecsum,velec);
984
985             fscal            = felec;
986
987             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
988
989             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
990
991             /* Update vectorial force */
992             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
993             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
994             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
995             
996             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
997             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
998             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
999
1000             }
1001
1002             /**************************
1003              * CALCULATE INTERACTIONS *
1004              **************************/
1005
1006             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1007             {
1008
1009             /* REACTION-FIELD ELECTROSTATICS */
1010             velec            = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
1011             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1012
1013             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1014
1015             /* Update potential sum for this i atom from the interaction with this j atom. */
1016             velec            = _fjsp_and_v2r8(velec,cutoff_mask);
1017             velec            = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1018             velecsum         = _fjsp_add_v2r8(velecsum,velec);
1019
1020             fscal            = felec;
1021
1022             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1023
1024             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1025
1026             /* Update vectorial force */
1027             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1028             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1029             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1030             
1031             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1032             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1033             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1034
1035             }
1036
1037             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1038
1039             /* Inner loop uses 385 flops */
1040         }
1041
1042         /* End of innermost loop */
1043
1044         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1045                                               f+i_coord_offset,fshift+i_shift_offset);
1046
1047         ggid                        = gid[iidx];
1048         /* Update potential energies */
1049         gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1050         gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1051
1052         /* Increment number of inner iterations */
1053         inneriter                  += j_index_end - j_index_start;
1054
1055         /* Outer loop uses 20 flops */
1056     }
1057
1058     /* Increment number of outer iterations */
1059     outeriter        += nri;
1060
1061     /* Update outer/inner flops */
1062
1063     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*385);
1064 }
1065 /*
1066  * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
1067  * Electrostatics interaction: ReactionField
1068  * VdW interaction:            LennardJones
1069  * Geometry:                   Water3-Water3
1070  * Calculate force/pot:        Force
1071  */
1072 void
1073 nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
1074                     (t_nblist                    * gmx_restrict       nlist,
1075                      rvec                        * gmx_restrict          xx,
1076                      rvec                        * gmx_restrict          ff,
1077                      t_forcerec                  * gmx_restrict          fr,
1078                      t_mdatoms                   * gmx_restrict     mdatoms,
1079                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1080                      t_nrnb                      * gmx_restrict        nrnb)
1081 {
1082     /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1083      * just 0 for non-waters.
1084      * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1085      * jnr indices corresponding to data put in the four positions in the SIMD register.
1086      */
1087     int              i_shift_offset,i_coord_offset,outeriter,inneriter;
1088     int              j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1089     int              jnrA,jnrB;
1090     int              j_coord_offsetA,j_coord_offsetB;
1091     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
1092     real             rcutoff_scalar;
1093     real             *shiftvec,*fshift,*x,*f;
1094     _fjsp_v2r8       tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1095     int              vdwioffset0;
1096     _fjsp_v2r8       ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1097     int              vdwioffset1;
1098     _fjsp_v2r8       ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1099     int              vdwioffset2;
1100     _fjsp_v2r8       ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1101     int              vdwjidx0A,vdwjidx0B;
1102     _fjsp_v2r8       jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1103     int              vdwjidx1A,vdwjidx1B;
1104     _fjsp_v2r8       jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1105     int              vdwjidx2A,vdwjidx2B;
1106     _fjsp_v2r8       jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1107     _fjsp_v2r8       dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1108     _fjsp_v2r8       dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1109     _fjsp_v2r8       dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1110     _fjsp_v2r8       dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1111     _fjsp_v2r8       dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1112     _fjsp_v2r8       dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1113     _fjsp_v2r8       dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1114     _fjsp_v2r8       dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1115     _fjsp_v2r8       dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1116     _fjsp_v2r8       velec,felec,velecsum,facel,crf,krf,krf2;
1117     real             *charge;
1118     int              nvdwtype;
1119     _fjsp_v2r8       rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1120     int              *vdwtype;
1121     real             *vdwparam;
1122     _fjsp_v2r8       one_sixth   = gmx_fjsp_set1_v2r8(1.0/6.0);
1123     _fjsp_v2r8       one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1124     _fjsp_v2r8       rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
1125     real             rswitch_scalar,d_scalar;
1126     _fjsp_v2r8       itab_tmp;
1127     _fjsp_v2r8       dummy_mask,cutoff_mask;
1128     _fjsp_v2r8       one     = gmx_fjsp_set1_v2r8(1.0);
1129     _fjsp_v2r8       two     = gmx_fjsp_set1_v2r8(2.0);
1130     union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1131
1132     x                = xx[0];
1133     f                = ff[0];
1134
1135     nri              = nlist->nri;
1136     iinr             = nlist->iinr;
1137     jindex           = nlist->jindex;
1138     jjnr             = nlist->jjnr;
1139     shiftidx         = nlist->shift;
1140     gid              = nlist->gid;
1141     shiftvec         = fr->shift_vec[0];
1142     fshift           = fr->fshift[0];
1143     facel            = gmx_fjsp_set1_v2r8(fr->epsfac);
1144     charge           = mdatoms->chargeA;
1145     krf              = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1146     krf2             = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1147     crf              = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1148     nvdwtype         = fr->ntype;
1149     vdwparam         = fr->nbfp;
1150     vdwtype          = mdatoms->typeA;
1151
1152     /* Setup water-specific parameters */
1153     inr              = nlist->iinr[0];
1154     iq0              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1155     iq1              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1156     iq2              = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1157     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
1158
1159     jq0              = gmx_fjsp_set1_v2r8(charge[inr+0]);
1160     jq1              = gmx_fjsp_set1_v2r8(charge[inr+1]);
1161     jq2              = gmx_fjsp_set1_v2r8(charge[inr+2]);
1162     vdwjidx0A        = 2*vdwtype[inr+0];
1163     qq00             = _fjsp_mul_v2r8(iq0,jq0);
1164     c6_00            = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1165     c12_00           = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1166     qq01             = _fjsp_mul_v2r8(iq0,jq1);
1167     qq02             = _fjsp_mul_v2r8(iq0,jq2);
1168     qq10             = _fjsp_mul_v2r8(iq1,jq0);
1169     qq11             = _fjsp_mul_v2r8(iq1,jq1);
1170     qq12             = _fjsp_mul_v2r8(iq1,jq2);
1171     qq20             = _fjsp_mul_v2r8(iq2,jq0);
1172     qq21             = _fjsp_mul_v2r8(iq2,jq1);
1173     qq22             = _fjsp_mul_v2r8(iq2,jq2);
1174
1175     /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1176     rcutoff_scalar   = fr->rcoulomb;
1177     rcutoff          = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1178     rcutoff2         = _fjsp_mul_v2r8(rcutoff,rcutoff);
1179
1180     rswitch_scalar   = fr->rvdw_switch;
1181     rswitch          = gmx_fjsp_set1_v2r8(rswitch_scalar);
1182     /* Setup switch parameters */
1183     d_scalar         = rcutoff_scalar-rswitch_scalar;
1184     d                = gmx_fjsp_set1_v2r8(d_scalar);
1185     swV3             = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
1186     swV4             = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
1187     swV5             = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
1188     swF2             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
1189     swF3             = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
1190     swF4             = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
1191
1192     /* Avoid stupid compiler warnings */
1193     jnrA = jnrB = 0;
1194     j_coord_offsetA = 0;
1195     j_coord_offsetB = 0;
1196
1197     outeriter        = 0;
1198     inneriter        = 0;
1199
1200     /* Start outer loop over neighborlists */
1201     for(iidx=0; iidx<nri; iidx++)
1202     {
1203         /* Load shift vector for this list */
1204         i_shift_offset   = DIM*shiftidx[iidx];
1205
1206         /* Load limits for loop over neighbors */
1207         j_index_start    = jindex[iidx];
1208         j_index_end      = jindex[iidx+1];
1209
1210         /* Get outer coordinate index */
1211         inr              = iinr[iidx];
1212         i_coord_offset   = DIM*inr;
1213
1214         /* Load i particle coords and add shift vector */
1215         gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1216                                                  &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1217
1218         fix0             = _fjsp_setzero_v2r8();
1219         fiy0             = _fjsp_setzero_v2r8();
1220         fiz0             = _fjsp_setzero_v2r8();
1221         fix1             = _fjsp_setzero_v2r8();
1222         fiy1             = _fjsp_setzero_v2r8();
1223         fiz1             = _fjsp_setzero_v2r8();
1224         fix2             = _fjsp_setzero_v2r8();
1225         fiy2             = _fjsp_setzero_v2r8();
1226         fiz2             = _fjsp_setzero_v2r8();
1227
1228         /* Start inner kernel loop */
1229         for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1230         {
1231
1232             /* Get j neighbor index, and coordinate index */
1233             jnrA             = jjnr[jidx];
1234             jnrB             = jjnr[jidx+1];
1235             j_coord_offsetA  = DIM*jnrA;
1236             j_coord_offsetB  = DIM*jnrB;
1237
1238             /* load j atom coordinates */
1239             gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1240                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1241
1242             /* Calculate displacement vector */
1243             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1244             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1245             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1246             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1247             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1248             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1249             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1250             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1251             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1252             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1253             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1254             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1255             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1256             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1257             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1258             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1259             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1260             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1261             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1262             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1263             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1264             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1265             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1266             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1267             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1268             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1269             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1270
1271             /* Calculate squared distance and things based on it */
1272             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1273             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1274             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1275             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1276             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1277             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1278             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1279             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1280             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1281
1282             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1283             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1284             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1285             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1286             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1287             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1288             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1289             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1290             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1291
1292             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1293             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1294             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1295             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1296             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1297             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1298             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1299             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1300             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1301
1302             fjx0             = _fjsp_setzero_v2r8();
1303             fjy0             = _fjsp_setzero_v2r8();
1304             fjz0             = _fjsp_setzero_v2r8();
1305             fjx1             = _fjsp_setzero_v2r8();
1306             fjy1             = _fjsp_setzero_v2r8();
1307             fjz1             = _fjsp_setzero_v2r8();
1308             fjx2             = _fjsp_setzero_v2r8();
1309             fjy2             = _fjsp_setzero_v2r8();
1310             fjz2             = _fjsp_setzero_v2r8();
1311
1312             /**************************
1313              * CALCULATE INTERACTIONS *
1314              **************************/
1315
1316             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1317             {
1318
1319             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1320
1321             /* REACTION-FIELD ELECTROSTATICS */
1322             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1323
1324             /* LENNARD-JONES DISPERSION/REPULSION */
1325
1326             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1327             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
1328             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
1329             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
1330             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
1331
1332             d                = _fjsp_sub_v2r8(r00,rswitch);
1333             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
1334             d2               = _fjsp_mul_v2r8(d,d);
1335             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
1336
1337             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
1338
1339             /* Evaluate switch function */
1340             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
1341             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
1342             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1343
1344             fscal            = _fjsp_add_v2r8(felec,fvdw);
1345
1346             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1347
1348             /* Update vectorial force */
1349             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1350             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1351             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1352             
1353             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1354             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1355             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1356
1357             }
1358
1359             /**************************
1360              * CALCULATE INTERACTIONS *
1361              **************************/
1362
1363             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1364             {
1365
1366             /* REACTION-FIELD ELECTROSTATICS */
1367             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1368
1369             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1370
1371             fscal            = felec;
1372
1373             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1374
1375             /* Update vectorial force */
1376             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1377             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1378             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1379             
1380             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1381             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1382             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1383
1384             }
1385
1386             /**************************
1387              * CALCULATE INTERACTIONS *
1388              **************************/
1389
1390             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1391             {
1392
1393             /* REACTION-FIELD ELECTROSTATICS */
1394             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1395
1396             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1397
1398             fscal            = felec;
1399
1400             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1401
1402             /* Update vectorial force */
1403             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1404             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1405             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1406             
1407             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1408             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1409             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1410
1411             }
1412
1413             /**************************
1414              * CALCULATE INTERACTIONS *
1415              **************************/
1416
1417             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1418             {
1419
1420             /* REACTION-FIELD ELECTROSTATICS */
1421             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1422
1423             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1424
1425             fscal            = felec;
1426
1427             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1428
1429             /* Update vectorial force */
1430             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1431             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1432             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1433             
1434             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1435             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1436             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1437
1438             }
1439
1440             /**************************
1441              * CALCULATE INTERACTIONS *
1442              **************************/
1443
1444             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1445             {
1446
1447             /* REACTION-FIELD ELECTROSTATICS */
1448             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1449
1450             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1451
1452             fscal            = felec;
1453
1454             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1455
1456             /* Update vectorial force */
1457             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1458             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1459             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1460             
1461             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1462             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1463             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1464
1465             }
1466
1467             /**************************
1468              * CALCULATE INTERACTIONS *
1469              **************************/
1470
1471             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1472             {
1473
1474             /* REACTION-FIELD ELECTROSTATICS */
1475             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1476
1477             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1478
1479             fscal            = felec;
1480
1481             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1482
1483             /* Update vectorial force */
1484             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1485             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1486             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1487             
1488             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1489             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1490             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1491
1492             }
1493
1494             /**************************
1495              * CALCULATE INTERACTIONS *
1496              **************************/
1497
1498             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1499             {
1500
1501             /* REACTION-FIELD ELECTROSTATICS */
1502             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1503
1504             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1505
1506             fscal            = felec;
1507
1508             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1509
1510             /* Update vectorial force */
1511             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1512             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1513             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1514             
1515             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1516             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1517             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1518
1519             }
1520
1521             /**************************
1522              * CALCULATE INTERACTIONS *
1523              **************************/
1524
1525             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1526             {
1527
1528             /* REACTION-FIELD ELECTROSTATICS */
1529             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1530
1531             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1532
1533             fscal            = felec;
1534
1535             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1536
1537             /* Update vectorial force */
1538             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1539             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1540             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1541             
1542             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1543             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1544             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1545
1546             }
1547
1548             /**************************
1549              * CALCULATE INTERACTIONS *
1550              **************************/
1551
1552             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1553             {
1554
1555             /* REACTION-FIELD ELECTROSTATICS */
1556             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1557
1558             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1559
1560             fscal            = felec;
1561
1562             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1563
1564             /* Update vectorial force */
1565             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1566             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1567             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1568             
1569             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1570             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1571             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1572
1573             }
1574
1575             gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1576
1577             /* Inner loop uses 328 flops */
1578         }
1579
1580         if(jidx<j_index_end)
1581         {
1582
1583             jnrA             = jjnr[jidx];
1584             j_coord_offsetA  = DIM*jnrA;
1585
1586             /* load j atom coordinates */
1587             gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1588                                               &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1589
1590             /* Calculate displacement vector */
1591             dx00             = _fjsp_sub_v2r8(ix0,jx0);
1592             dy00             = _fjsp_sub_v2r8(iy0,jy0);
1593             dz00             = _fjsp_sub_v2r8(iz0,jz0);
1594             dx01             = _fjsp_sub_v2r8(ix0,jx1);
1595             dy01             = _fjsp_sub_v2r8(iy0,jy1);
1596             dz01             = _fjsp_sub_v2r8(iz0,jz1);
1597             dx02             = _fjsp_sub_v2r8(ix0,jx2);
1598             dy02             = _fjsp_sub_v2r8(iy0,jy2);
1599             dz02             = _fjsp_sub_v2r8(iz0,jz2);
1600             dx10             = _fjsp_sub_v2r8(ix1,jx0);
1601             dy10             = _fjsp_sub_v2r8(iy1,jy0);
1602             dz10             = _fjsp_sub_v2r8(iz1,jz0);
1603             dx11             = _fjsp_sub_v2r8(ix1,jx1);
1604             dy11             = _fjsp_sub_v2r8(iy1,jy1);
1605             dz11             = _fjsp_sub_v2r8(iz1,jz1);
1606             dx12             = _fjsp_sub_v2r8(ix1,jx2);
1607             dy12             = _fjsp_sub_v2r8(iy1,jy2);
1608             dz12             = _fjsp_sub_v2r8(iz1,jz2);
1609             dx20             = _fjsp_sub_v2r8(ix2,jx0);
1610             dy20             = _fjsp_sub_v2r8(iy2,jy0);
1611             dz20             = _fjsp_sub_v2r8(iz2,jz0);
1612             dx21             = _fjsp_sub_v2r8(ix2,jx1);
1613             dy21             = _fjsp_sub_v2r8(iy2,jy1);
1614             dz21             = _fjsp_sub_v2r8(iz2,jz1);
1615             dx22             = _fjsp_sub_v2r8(ix2,jx2);
1616             dy22             = _fjsp_sub_v2r8(iy2,jy2);
1617             dz22             = _fjsp_sub_v2r8(iz2,jz2);
1618
1619             /* Calculate squared distance and things based on it */
1620             rsq00            = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1621             rsq01            = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1622             rsq02            = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1623             rsq10            = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1624             rsq11            = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1625             rsq12            = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1626             rsq20            = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1627             rsq21            = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1628             rsq22            = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1629
1630             rinv00           = gmx_fjsp_invsqrt_v2r8(rsq00);
1631             rinv01           = gmx_fjsp_invsqrt_v2r8(rsq01);
1632             rinv02           = gmx_fjsp_invsqrt_v2r8(rsq02);
1633             rinv10           = gmx_fjsp_invsqrt_v2r8(rsq10);
1634             rinv11           = gmx_fjsp_invsqrt_v2r8(rsq11);
1635             rinv12           = gmx_fjsp_invsqrt_v2r8(rsq12);
1636             rinv20           = gmx_fjsp_invsqrt_v2r8(rsq20);
1637             rinv21           = gmx_fjsp_invsqrt_v2r8(rsq21);
1638             rinv22           = gmx_fjsp_invsqrt_v2r8(rsq22);
1639
1640             rinvsq00         = _fjsp_mul_v2r8(rinv00,rinv00);
1641             rinvsq01         = _fjsp_mul_v2r8(rinv01,rinv01);
1642             rinvsq02         = _fjsp_mul_v2r8(rinv02,rinv02);
1643             rinvsq10         = _fjsp_mul_v2r8(rinv10,rinv10);
1644             rinvsq11         = _fjsp_mul_v2r8(rinv11,rinv11);
1645             rinvsq12         = _fjsp_mul_v2r8(rinv12,rinv12);
1646             rinvsq20         = _fjsp_mul_v2r8(rinv20,rinv20);
1647             rinvsq21         = _fjsp_mul_v2r8(rinv21,rinv21);
1648             rinvsq22         = _fjsp_mul_v2r8(rinv22,rinv22);
1649
1650             fjx0             = _fjsp_setzero_v2r8();
1651             fjy0             = _fjsp_setzero_v2r8();
1652             fjz0             = _fjsp_setzero_v2r8();
1653             fjx1             = _fjsp_setzero_v2r8();
1654             fjy1             = _fjsp_setzero_v2r8();
1655             fjz1             = _fjsp_setzero_v2r8();
1656             fjx2             = _fjsp_setzero_v2r8();
1657             fjy2             = _fjsp_setzero_v2r8();
1658             fjz2             = _fjsp_setzero_v2r8();
1659
1660             /**************************
1661              * CALCULATE INTERACTIONS *
1662              **************************/
1663
1664             if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1665             {
1666
1667             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
1668
1669             /* REACTION-FIELD ELECTROSTATICS */
1670             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1671
1672             /* LENNARD-JONES DISPERSION/REPULSION */
1673
1674             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1675             vvdw6            = _fjsp_mul_v2r8(c6_00,rinvsix);
1676             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
1677             vvdw             = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
1678             fvdw             = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
1679
1680             d                = _fjsp_sub_v2r8(r00,rswitch);
1681             d                = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
1682             d2               = _fjsp_mul_v2r8(d,d);
1683             sw               = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
1684
1685             dsw              = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
1686
1687             /* Evaluate switch function */
1688             /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
1689             fvdw             = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
1690             cutoff_mask      = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1691
1692             fscal            = _fjsp_add_v2r8(felec,fvdw);
1693
1694             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1695
1696             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1697
1698             /* Update vectorial force */
1699             fix0             = _fjsp_madd_v2r8(dx00,fscal,fix0);
1700             fiy0             = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1701             fiz0             = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1702             
1703             fjx0             = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1704             fjy0             = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1705             fjz0             = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1706
1707             }
1708
1709             /**************************
1710              * CALCULATE INTERACTIONS *
1711              **************************/
1712
1713             if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1714             {
1715
1716             /* REACTION-FIELD ELECTROSTATICS */
1717             felec            = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1718
1719             cutoff_mask      = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1720
1721             fscal            = felec;
1722
1723             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1724
1725             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1726
1727             /* Update vectorial force */
1728             fix0             = _fjsp_madd_v2r8(dx01,fscal,fix0);
1729             fiy0             = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1730             fiz0             = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1731             
1732             fjx1             = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1733             fjy1             = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1734             fjz1             = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1735
1736             }
1737
1738             /**************************
1739              * CALCULATE INTERACTIONS *
1740              **************************/
1741
1742             if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1743             {
1744
1745             /* REACTION-FIELD ELECTROSTATICS */
1746             felec            = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1747
1748             cutoff_mask      = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1749
1750             fscal            = felec;
1751
1752             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1753
1754             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1755
1756             /* Update vectorial force */
1757             fix0             = _fjsp_madd_v2r8(dx02,fscal,fix0);
1758             fiy0             = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1759             fiz0             = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1760             
1761             fjx2             = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1762             fjy2             = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1763             fjz2             = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1764
1765             }
1766
1767             /**************************
1768              * CALCULATE INTERACTIONS *
1769              **************************/
1770
1771             if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1772             {
1773
1774             /* REACTION-FIELD ELECTROSTATICS */
1775             felec            = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1776
1777             cutoff_mask      = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1778
1779             fscal            = felec;
1780
1781             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1782
1783             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1784
1785             /* Update vectorial force */
1786             fix1             = _fjsp_madd_v2r8(dx10,fscal,fix1);
1787             fiy1             = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1788             fiz1             = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1789             
1790             fjx0             = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1791             fjy0             = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1792             fjz0             = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1793
1794             }
1795
1796             /**************************
1797              * CALCULATE INTERACTIONS *
1798              **************************/
1799
1800             if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1801             {
1802
1803             /* REACTION-FIELD ELECTROSTATICS */
1804             felec            = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1805
1806             cutoff_mask      = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1807
1808             fscal            = felec;
1809
1810             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1811
1812             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1813
1814             /* Update vectorial force */
1815             fix1             = _fjsp_madd_v2r8(dx11,fscal,fix1);
1816             fiy1             = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1817             fiz1             = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1818             
1819             fjx1             = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1820             fjy1             = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1821             fjz1             = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1822
1823             }
1824
1825             /**************************
1826              * CALCULATE INTERACTIONS *
1827              **************************/
1828
1829             if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1830             {
1831
1832             /* REACTION-FIELD ELECTROSTATICS */
1833             felec            = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1834
1835             cutoff_mask      = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1836
1837             fscal            = felec;
1838
1839             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1840
1841             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1842
1843             /* Update vectorial force */
1844             fix1             = _fjsp_madd_v2r8(dx12,fscal,fix1);
1845             fiy1             = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1846             fiz1             = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1847             
1848             fjx2             = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1849             fjy2             = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1850             fjz2             = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1851
1852             }
1853
1854             /**************************
1855              * CALCULATE INTERACTIONS *
1856              **************************/
1857
1858             if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1859             {
1860
1861             /* REACTION-FIELD ELECTROSTATICS */
1862             felec            = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1863
1864             cutoff_mask      = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1865
1866             fscal            = felec;
1867
1868             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1869
1870             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1871
1872             /* Update vectorial force */
1873             fix2             = _fjsp_madd_v2r8(dx20,fscal,fix2);
1874             fiy2             = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1875             fiz2             = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1876             
1877             fjx0             = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1878             fjy0             = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1879             fjz0             = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1880
1881             }
1882
1883             /**************************
1884              * CALCULATE INTERACTIONS *
1885              **************************/
1886
1887             if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1888             {
1889
1890             /* REACTION-FIELD ELECTROSTATICS */
1891             felec            = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1892
1893             cutoff_mask      = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1894
1895             fscal            = felec;
1896
1897             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1898
1899             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1900
1901             /* Update vectorial force */
1902             fix2             = _fjsp_madd_v2r8(dx21,fscal,fix2);
1903             fiy2             = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1904             fiz2             = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1905             
1906             fjx1             = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1907             fjy1             = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1908             fjz1             = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1909
1910             }
1911
1912             /**************************
1913              * CALCULATE INTERACTIONS *
1914              **************************/
1915
1916             if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1917             {
1918
1919             /* REACTION-FIELD ELECTROSTATICS */
1920             felec            = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1921
1922             cutoff_mask      = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1923
1924             fscal            = felec;
1925
1926             fscal            = _fjsp_and_v2r8(fscal,cutoff_mask);
1927
1928             fscal            = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1929
1930             /* Update vectorial force */
1931             fix2             = _fjsp_madd_v2r8(dx22,fscal,fix2);
1932             fiy2             = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1933             fiz2             = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1934             
1935             fjx2             = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1936             fjy2             = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1937             fjz2             = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1938
1939             }
1940
1941             gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1942
1943             /* Inner loop uses 328 flops */
1944         }
1945
1946         /* End of innermost loop */
1947
1948         gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1949                                               f+i_coord_offset,fshift+i_shift_offset);
1950
1951         /* Increment number of inner iterations */
1952         inneriter                  += j_index_end - j_index_start;
1953
1954         /* Outer loop uses 18 flops */
1955     }
1956
1957     /* Increment number of outer iterations */
1958     outeriter        += nri;
1959
1960     /* Update outer/inner flops */
1961
1962     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*328);
1963 }